Background

This document is to provide some plotting examples for reference.


Example #1: US State and County Maps

The package usmap contains maps of US states and counties. There is also some associated data available about state and county demographics.

Example code includes:

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Population by county
data(countypop, package="usmap")
# Population by state
data(statepop, package="usmap")
# Poverty rate by county
data(countypov, package="usmap")
# Poverty rate by state
data(statepov, package="usmap")
# Population of largest city by state
data(citypop, package="usmap")
# Location of earthquakes
data(earthquakes, package="usmap")


# Included datasets
countypop
## # A tibble: 3,142 x 4
##    fips  abbr  county          pop_2015
##    <chr> <chr> <chr>              <dbl>
##  1 01001 AL    Autauga County     55347
##  2 01003 AL    Baldwin County    203709
##  3 01005 AL    Barbour County     26489
##  4 01007 AL    Bibb County        22583
##  5 01009 AL    Blount County      57673
##  6 01011 AL    Bullock County     10696
##  7 01013 AL    Butler County      20154
##  8 01015 AL    Calhoun County    115620
##  9 01017 AL    Chambers County    34123
## 10 01019 AL    Cherokee County    25859
## # ... with 3,132 more rows
statepop
## # A tibble: 51 x 4
##    fips  abbr  full                 pop_2015
##    <chr> <chr> <chr>                   <dbl>
##  1 01    AL    Alabama               4858979
##  2 02    AK    Alaska                 738432
##  3 04    AZ    Arizona               6828065
##  4 05    AR    Arkansas              2978204
##  5 06    CA    California           39144818
##  6 08    CO    Colorado              5456574
##  7 09    CT    Connecticut           3590886
##  8 10    DE    Delaware               945934
##  9 11    DC    District of Columbia   672228
## 10 12    FL    Florida              20271272
## # ... with 41 more rows
countypov
## # A tibble: 3,142 x 4
##    fips  abbr  county          pct_pov_2014
##    <chr> <chr> <chr>                  <dbl>
##  1 01001 AL    Autauga County          13.1
##  2 01003 AL    Baldwin County          13  
##  3 01005 AL    Barbour County          25.4
##  4 01007 AL    Bibb County             18.1
##  5 01009 AL    Blount County           17.5
##  6 01011 AL    Bullock County          35.1
##  7 01013 AL    Butler County           25  
##  8 01015 AL    Calhoun County          20.5
##  9 01017 AL    Chambers County         21.3
## 10 01019 AL    Cherokee County         18.6
## # ... with 3,132 more rows
statepov
## # A tibble: 51 x 4
##    fips  abbr  full                 pct_pov_2014
##    <chr> <chr> <chr>                       <dbl>
##  1 01    AL    Alabama                      19.2
##  2 02    AK    Alaska                       11.4
##  3 04    AZ    Arizona                      18.2
##  4 05    AR    Arkansas                     18.7
##  5 06    CA    California                   16.4
##  6 08    CO    Colorado                     12.1
##  7 09    CT    Connecticut                  10.8
##  8 10    DE    Delaware                     13  
##  9 11    DC    District of Columbia         18.4
## 10 12    FL    Florida                      16.6
## # ... with 41 more rows
citypop
## # A tibble: 51 x 6
##       lon   lat state                abbr  most_populous_city city_pop
##     <dbl> <dbl> <chr>                <chr> <chr>                 <dbl>
##  1  -86.8  33.6 Alabama              AL    Birmingham           212237
##  2 -150.   61.2 Alaska               AK    Anchorage            291826
##  3 -112.   33.4 Arizona              AZ    Phoenix             1445632
##  4  -92.3  34.7 Arkansas             AR    Little Rock          193524
##  5 -118.   34.0 California           CA    Los Angeles         3792621
##  6 -105.   39.8 Colorado             CO    Denver               600158
##  7  -73.2  41.2 Connecticut          CT    Bridgeport           144229
##  8  -75.6  39.8 Delaware             DE    Wilmington            70851
##  9  -77.0  38.9 District of Columbia DC    Washington           693972
## 10  -81.7  30.3 Florida              FL    Jacksonville         880619
## # ... with 41 more rows
earthquakes
## # A tibble: 2,254 x 3
##      lon   lat   mag
##    <dbl> <dbl> <dbl>
##  1 -118.  35.7  2.79
##  2 -118.  36.3  3.1 
##  3 -118.  36.2  2.74
##  4 -125.  40.4  2.77
##  5 -118.  35.7  2.59
##  6 -118.  35.7  2.82
##  7 -118.  35.7  2.78
##  8 -124.  40.3  3.42
##  9 -124.  41.7  2.5 
## 10 -119.  35.3  2.58
## # ... with 2,244 more rows
# Basic, empty US maps
usmap::plot_usmap(regions="states")

usmap::plot_usmap(regions="counties")

# Basic, empty US maps subsetted to an area
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM")
                  )

usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"))

# Basic, subsetted state map with poverty rates included
usmap::plot_usmap(regions="states", 
                  include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM"), 
                  values="pct_pov_2014", data=statepov, labels=TRUE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by Western and Mountain States")

# Basic, subsetted county map with poverty rates included
usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"),
                  values="pct_pov_2014", data=countypov, labels=FALSE
                  ) + 
    scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") + 
    labs(title="Poverty Rates by County in Great Lakes States")

Example #2: Converting and adding lat/lon data

The latitude and longitude data can be converted to a form suitable for usmap by using the usmap_transform function.

Example code includes:

# Transform the earthquakes data
trQuakes <- usmap::usmap_transform(earthquakes)
str(trQuakes)
## 'data.frame':    2254 obs. of  5 variables:
##  $ lon  : num  -118 -118 -118 -125 -118 ...
##  $ lat  : num  35.7 36.3 36.2 40.4 35.7 ...
##  $ mag  : num  2.79 3.1 2.74 2.77 2.59 2.82 2.78 3.42 2.5 2.58 ...
##  $ lon.1: num  -1575319 -1632293 -1595342 -2065325 -1574008 ...
##  $ lat.1: num  -872720 -785212 -811800 -197614 -867723 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trQuakes, aes(x=lon.1, y=lat.1, size=mag), alpha=0.4) + 
    labs(title="Earthquakes of Magnitude 2.5+ (H1 2019)")

# Transform the largest city data
trCity <- usmap::usmap_transform(citypop)
str(trCity)
## 'data.frame':    51 obs. of  8 variables:
##  $ lon               : num  -86.8 -112.1 -92.3 -118.2 -104.9 ...
##  $ lat               : num  33.6 33.5 34.7 34 39.8 ...
##  $ state             : chr  "Alabama" "Arizona" "Arkansas" "California" ...
##  $ abbr              : chr  "AL" "AZ" "AR" "CA" ...
##  $ most_populous_city: chr  "Birmingham" "Phoenix" "Little Rock" "Los Angeles" ...
##  $ city_pop          : num  212237 1445632 193524 3792621 600158 ...
##  $ lon.1             : num  1220905 -1120927 702478 -1673156 -417275 ...
##  $ lat.1             : num  -1165156 -1202575 -1107534 -1034842 -570173 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") + 
    geom_point(data=trCity, aes(x=lon.1, y=lat.1, size=city_pop)) + 
    labs(title="Largest City by State")

Example #3: Filtering and coloring by region

The census region definitions are included, and can be used to filter or color the maps.

Example code includes:

# Filter the map to include only new_england, mid_atlantic, and south_atlantic
usmap::plot_usmap(regions="states", 
                  include=c(usmap::.new_england, usmap::.mid_atlantic, usmap::.south_atlantic)
                  )

# Create regions data for US states
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0)))
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_discrete("Midwest") + 
    labs(title="Midwest Region US States")

# Enhanced Coloring and Labelling
usmap::plot_usmap(regions="states", data=regionData, values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States")

Example #4: Labelling geographies

Since usmap is built on ggplot2, the standard techniques from ggplot2 can be used to enhance the geography labelling. Further, centroids for the geographies are available in loadable files.

Example code includes:

# Base state map labelled with defaults
usmap::plot_usmap(regions="states", labels=TRUE, label_color="red")

# Base county map labelled with defaults
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), labels=TRUE, label_color="grey")

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Add state labels using geom_text
regionData <- usmap::statepop %>%
    mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0))) %>%
    left_join(stCenter %>% select(x, y, full, fips) %>% rename(fname=full)) %>%
    mutate(fname=ifelse(fname=="District of Columbia", "DC", str_replace_all(fname, " ", "\n")))
## Joining, by = "fips"
usmap::plot_usmap(regions="states", data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) + 
    labs(title="Midwest Region US States") + 
    geom_text(data=filter(regionData, region==1), aes(x=x, y=y, label=fname), size=2.5)

# Add county labels using geom_text
regionData <- usmap::countypop %>%
    mutate(region=as.factor(case_when(abbr=="OK" ~ 1, abbr=="TX" ~ 2, TRUE ~ 0))) %>%
    left_join(ctCenter %>% select(x, y, county, fips) %>% rename(cname=county)) %>%
    mutate(cname=str_replace_all(str_replace(cname, " County", ""), " ", "\n"))
## Joining, by = "fips"
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), 
                  data=regionData[, c("fips", "region")], values="region") + 
    scale_fill_manual(values=c("red", "orange"), "", labels=c("Oklahoma", "Texas")) + 
    labs(title="Texas and Oklahoma Counties") + 
    geom_text(data=filter(regionData, abbr %in% c("TX", "OK")), 
              aes(x=x, y=y, label=cname), size=2.5, 
              color=ifelse(pull(filter(regionData, abbr %in% c("TX", "OK")), abbr)=="OK", "white", "black")
              )

Example #5: Adding population centers

Separate data exists for key population centers, which can be loaded and then added to maps.

Example code includes:

# Transform the largest city data
str(maps::us.cities)
## 'data.frame':    1005 obs. of  6 variables:
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
trCity <- usmap::usmap_transform(select(maps::us.cities, long, lat, everything())) %>% 
    mutate(useName=str_replace_all(str_sub(name, 1, -4), " ", "\n"))
str(trCity)
## 'data.frame':    1005 obs. of  9 variables:
##  $ long       : num  -99.7 -81.5 -122.3 -84.2 -73.8 ...
##  $ lat        : num  32.5 41.1 37.8 31.6 42.7 ...
##  $ name       : chr  "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
##  $ country.etc: chr  "TX" "OH" "CA" "GA" ...
##  $ pop        : int  113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
##  $ capital    : int  0 0 0 0 2 0 0 0 0 0 ...
##  $ long.1     : num  24544 1533716 -1931842 1498524 2096820 ...
##  $ lat.1      : num  -1392670 -262400 -543195 -1350290 82431 ...
##  $ useName    : chr  "Abilene" "Akron" "Alameda" "Albany" ...
# Define a key region for plotting
rgnPlot <- c(usmap::.west_south_central, usmap::.east_south_central)
popFilter <- 100000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("South Central Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot the full nation for cities of 250k +
rgnPlot <- c(usmap::.midwest_region, usmap::.northeast_region, 
             usmap::.south_region, usmap::.west_region
             )
popFilter <- 250000

# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("US Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Plot cities by name for the Four Corners region
rgnPlot <- c("UT", "CO", "NM", "AZ")
popFilter <- 125000

# Add cities as a layer to the state map (points)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               )

# Add cities as a layer to the state map (text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_text(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName)
               )

popFilter <- 50000
popFilter2 <- 250000

# Add cities as a layer to the state map (points and text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") + 
    labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) + 
    geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop), alpha=0.5
               ) + 
    geom_text(data=filter(trCity, pop >= popFilter2, country.etc %in% rgnPlot), 
               aes(x=long.1, y=lat.1, size=pop, label=useName), color="red", show.legend=FALSE
               )

Example #6: Custom coloring geographies

Using scale_fill_manual(), custom colors can be created by geography.

Example code includes:

# Basic county population map with continuous colors
usmap::countypop %>% 
    filter(abbr %in% c("OH", "IN", "KY")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n")) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (000)") + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County")

# Custom county population map with colors - red for Indiana, blue for Kentucky, grey for Ohio
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), popLabels)

usmap::countypop %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Population by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

# Custom county poverty rate map with colors - red for Indiana, blue for Kentucky, grey for Ohio
povBucket <- c(0, 15, 30)
povLabels <- sapply(1:(length(povBucket)-1), FUN=function(x){paste0(povBucket[x], "-", povBucket[x+1])})
povLabels <- c(povLabels, paste0(povBucket[length(povBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), povLabels)

usmap::countypov %>% 
    filter(abbr %in% c("OH", "KY", "IN")) %>% 
    mutate(name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pct_pov_2014, povBucket), 
           pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(povBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Poverty Rate (%)", labels=guideLabels) + 
    labs(title="Indiana, Ohio, and Kentucky - Poverty Rate by County") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3))

Example #7: Custom labeling of key geographies

The above techniques can be combined for custom labeling of key geographies.

Example code includes:

# Basic state population data
stateData <- usmap::statepop %>% 
    mutate(pop=round(pop_2015/1000000, 1), 
           name=ifelse(full=="District of Columbia", "DC", str_replace(full, " ", "\n")), 
           lab=paste0(abbr, "\n(", pop, ")\n")
           )

# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "states", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
                            )

# Grab centroids for top 5 states
top5State <- stateData %>%
    top_n(5, pop) %>% 
    left_join(select(stCenter, x, y, fips))
## Joining, by = "fips"
# Plot state population with continuous colors and custom labels
stateData %>% 
    usmap::plot_usmap(regions="states", data=., values="pop") +
    scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (millions)") + 
    labs(title="Population by State", subtitle="Top 5 in millions") + 
    geom_text(data=top5State, aes(x=x, y=y, label=lab), color="white", size=4, fontface="bold")

# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata", 
                                        paste0("us_", "counties", "_centroids.csv"), package = "usmap"
                                        ),
                            colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
                            )

# Custom county population map with colors - red for Wisconsin, blue for Michigan
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("MI", "WI"), each=3), popLabels)

# Grab county data for counties exceeding the top popBucket
ctyData <- usmap::countypop %>%
    filter(abbr %in% c("MI", "WI")) %>%
    mutate(pop=round(pop_2015/1000, 0), 
           name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           lab=paste0(name, "\n(", pop, ")\n")
           )

topCounty <- ctyData %>%
    filter(pop >= max(popBucket)) %>%
    left_join(select(ctCenter, x, y, fips))
## Joining, by = "fips"
# Create county population map
usmap::countypop %>% 
    filter(abbr %in% c("MI", "WI")) %>% 
    mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"), 
           pBucket=findInterval(pop, popBucket), 
           pColor=rgb(abbr=="WI", 0, abbr=="MI", pBucket/length(popBucket))
           ) %>%
    usmap::plot_usmap(regions="counties", include=c("WI", "MI"), data=., values="pColor") +
    scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) + 
    geom_text(data=topCounty, aes(x=x, y=y, label=lab), size=3, fontface="bold", color="white") +
    labs(title="Michigan and Wisconsin - Population by County", subtitle="Labelled Pop. (000) for 500k+") + 
    theme(legend.position = "bottom") + 
    guides(fill=guide_legend(nrow=3)) + 
    theme(panel.background=element_rect(color="black", fill="lightgrey"))

Example #8: Plotting Weather Data (Temperatures and Dew Points)

The ggridges package has weather data for Lincoln, NE in the data file ‘lincoln_weather’. The data are captured once per day for 366 days of 2016. Simple plots can be made of the average temperatures and dew points.

Example code includes:

data(lincoln_weather, package="ggridges")
str(lincoln_weather, give.attr=FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  24 variables:
##  $ CST                         : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ Max Temperature [F]         : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ Mean Temperature [F]        : int  24 23 23 17 29 33 30 25 9 11 ...
##  $ Min Temperature [F]         : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ Max Dew Point [F]           : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ Mean Dew Point [F]          : int  13 14 15 13 25 32 30 22 4 5 ...
##  $ Min Dewpoint [F]            : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ Max Humidity                : int  88 100 92 92 96 100 100 92 87 87 ...
##  $ Mean Humidity               : int  68 72 73 82 83 91 96 85 77 75 ...
##  $ Min Humidity                : int  47 44 54 72 70 82 92 78 67 63 ...
##  $ Max Sea Level Pressure [In] : num  30.5 30.4 30.5 30.5 30.2 ...
##  $ Mean Sea Level Pressure [In]: num  30.4 30.3 30.4 30.4 30.1 ...
##  $ Min Sea Level Pressure [In] : num  30.3 30.2 30.3 30.2 30 ...
##  $ Max Visibility [Miles]      : int  10 10 10 10 10 10 9 10 10 10 ...
##  $ Mean Visibility [Miles]     : int  10 10 10 9 8 4 3 6 9 10 ...
##  $ Min Visibility [Miles]      : int  10 10 10 6 5 0 0 2 5 10 ...
##  $ Max Wind Speed [MPH]        : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ Mean Wind Speed[MPH]        : int  9 6 5 7 13 7 7 16 14 5 ...
##  $ Max Gust Speed [MPH]        : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ Precipitation [In]          : chr  "0" "0" "0" "0" ...
##  $ CloudCover                  : int  0 0 0 1 4 8 8 8 5 0 ...
##  $ Events                      : chr  NA NA NA NA ...
##  $ WindDir [Degrees]           : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ Month                       : Factor w/ 12 levels "December","November",..: 12 12 12 12 12 12 12 12 12 12 ...
# Extract temperature and dew point data
tdData <- lincoln_weather %>%
    select(CST, maxT=`Max Temperature [F]`, minT=`Min Temperature [F]`, meanT=`Mean Temperature [F]`, 
           maxD=`Max Dew Point [F]`, minD=`Min Dewpoint [F]`, meanD=`Mean Dew Point [F]`
           ) %>%
    mutate(date=as.Date(CST))
str(tdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  8 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxT : int  37 41 37 30 38 34 33 28 22 31 ...
##  $ minT : int  11 5 8 4 19 32 27 22 -4 -9 ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ maxD : int  19 22 23 24 29 33 32 25 17 20 ...
##  $ minD : int  8 4 8 2 19 29 25 18 -8 -13 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Plot temperatures by day
tdData %>%
    select(date, maxT, meanT, minT) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

# Plot dew points by day
tdData %>%
    select(date, maxD, meanD, minD) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=date, y=value, group=name)) + 
    geom_line(aes(color=name))

library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
# Create an XTS for temperature data
tdXTS <- xts(select(tdData, minT, meanT, maxT), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Temperature Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Temperature Average (Lincoln, NE 2016)")

# Create an XTS for dew-point data
tdXTS <- xts(select(tdData, minD, meanD, maxD), order.by=tdData$date)

# Create and plot weekly and monthly averages
tdXTS %>%
    apply.weekly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Weekly Dew Point Average (Lincoln, NE 2016)")

tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>%
    plot(main="Monthly Dew Point Average (Lincoln, NE 2016)")

Example #9: Combining xts and ggplot2

The xts package is good for working with time series data, while ggplot2 is strong for customizing plots. The packages can be combined in using the weather data.

Example code includes:

# Create an XTS for temperature and dewpoint data
tdXTS <- xts(select(tdData, minT, meanT, maxT, minD, meanD, maxD), order.by=tdData$date)

# Use xts for monthly average and ggplot2 for plotting
basePlot <- tdXTS %>%
    apply.monthly(FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL) %>% 
    ggplot(aes(x=date-lubridate::days(15))) + 
    geom_ribbon(aes(ymin=minT, ymax=maxT), color="lightblue", fill="lightblue", alpha=0.5) +
    geom_line(aes(y=meanT), color="blue", lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Monthly Avg. Temperature (F)"
         )
basePlot

# Add labelling for the three elements
hiMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[3]
loMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[9]
muMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[6]
hiPoint <- c(60, 75)
loPoint <- c(45, 25)
muPoint <- c(72.5, 45)

labFrame <- data.frame(x=c(hiMonth, loMonth, muMonth), 
                       yend=c(hiPoint[1], loPoint[1], muPoint[1]),
                       y=c(hiPoint[2], loPoint[2], muPoint[2]), 
                       text=c("Avg. Monthly High", "Avg. Monthly Low", "Avg. Monthly Mean")) %>%
    mutate(xend=x)

basePlot + 
    geom_segment(data=labFrame, aes(x=x, y=y, xend=xend, yend=yend), arrow=arrow()) + 
    geom_text(data=labFrame, aes(x=x, y=y+c(5, -5, -5), label=text), fontface="bold", size=4)

# Can also create and plot a custom rolling average
baseData <- tdXTS %>%
    data.frame(date=index(.), row.names=NULL)

base7Day <- rollapply(tdXTS, 7, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

base30Day <- rollapply(tdXTS, 30, FUN=mean, na.rm=TRUE) %>% 
    data.frame(date=index(.), row.names=NULL)

plotFrame <- bind_rows(baseData, base7Day, base30Day, .id="rolling") %>%
    mutate(rollLabel=case_when(rolling==1 ~ "Daily", 
                               rolling==2 ~ "7 Day Rolling", 
                               rolling==3 ~ "30 Day Rolling",
                               TRUE ~ "ERROR"
                               )
           )
            
plotFrame %>%
    ggplot(aes(x=date)) + 
    geom_line(aes(y=meanT, color=rollLabel, group=rollLabel), lwd=1) + 
    labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)", 
         subtitle="Daily Avg. Temperature (F)"
         )
## Warning: Removed 35 rows containing missing values (geom_path).

Example #10: Plotting Weather Data (Humidity)

Humidity data are also available in the lincoln_weather dataset. There is a relationship between temperature, dewpoint, and humidity.

Example code includes:

htdData <- lincoln_weather %>%
    select(CST, meanT=`Mean Temperature [F]`, meanD=`Mean Dew Point [F]`, meanH=`Mean Humidity`) %>%
    mutate(date=as.Date(CST), month=lubridate::month(date))
str(htdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ meanT: int  24 23 23 17 29 33 30 25 9 11 ...
##  $ meanD: int  13 14 15 13 25 32 30 22 4 5 ...
##  $ meanH: int  68 72 73 82 83 91 96 85 77 75 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
##  $ month: num  1 1 1 1 1 1 1 1 1 1 ...
# Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

htdData %>%
    filter((meanH < 25) | is.na(meanH))
## # A tibble: 2 x 6
##   CST       meanT meanD meanH date       month
##   <chr>     <int> <int> <int> <date>     <dbl>
## 1 2016-2-27    50    21     0 2016-02-27     2
## 2 2016-2-28    44    NA    NA 2016-02-28     2
htdData <- htdData %>%
    filter(!((meanH < 25) | is.na(meanH)))

# Updated Histogram for average humidity
htdData %>%
    ggplot(aes(x=meanH)) + 
    geom_histogram() + 
    labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD)) + 
    geom_histogram() + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

htdData %>% 
    filter(meanD >= meanT)
## # A tibble: 2 x 6
##   CST        meanT meanD meanH date       month
##   <chr>      <int> <int> <int> <date>     <dbl>
## 1 2016-1-7      30    30    96 2016-01-07     1
## 2 2016-11-27    37    39    85 2016-11-27    11
htdData <- htdData %>%
    filter(meanT >= meanD)

# Updated Histogram for dewpoint depression (T - D)
htdData %>%
    ggplot(aes(x=meanT-meanD, y=..density..)) + 
    geom_histogram(binwidth=1) + 
    geom_density(color="red") + 
    labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Proportion")

# Average humidity by month
htdData %>%
    group_by(month) %>%
    summarize(meanH=mean(meanH, na.rm=TRUE)) %>%
    ggplot(aes(x=as.factor(month), y=meanH)) + 
    geom_col() + 
    labs(title="Average Humidity by Month", subtitle="Lincoln, NE (2016)", x="Month", y="Mean Humidity (%)")

# Relationship between temperature and dewpoint
htdData %>%
    ggplot(aes(x=meanD, y=meanT)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )

# Relationship between dewpoint depression and humidity
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    ggplot(aes(x=dpD, y=meanH)) + 
    geom_point() + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint Depression (F)", y="Mean Humidity (%)"
         )

# Relationship between temperature and dewpoint and humidity
humInts <- c(0, 50, 60, 70, 80)
humLabel <- sapply(1:(length(humInts)-1), FUN=function(x) { paste0(humInts[x], "-", humInts[x+1]) })
humLabel <- c(humLabel, paste0(humInts[length(humInts)], "+"))

htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE) +
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Mean Temperature (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. dewpoint
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanD, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth() +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Dewpoint (F)", y="Dewpoint Depression (F)"
         )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

# Expressed using dewpoint depression vs. temperature
htdData %>%
    mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
    ggplot(aes(x=meanT, y=meanT-meanD, color=humBin)) + 
    geom_point() + 
    geom_smooth(se=FALSE, method="lm") +
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Mean Temperature (F)", y="Dewpoint Depression (F)"
         )

# Linear regression for temperature, dewpoint, and humidity
htdReg <- htdData %>%
    mutate(dpD=meanT-meanD) %>%
    lm(meanH ~ meanT + dpD, data=.)
summary(htdReg)
## 
## Call:
## lm(formula = meanH ~ meanT + dpD, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.3569  -2.5472  -0.1894   2.7285  14.4934 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 82.89890    0.83037  99.833  < 2e-16 ***
## meanT        0.09771    0.01403   6.965 1.57e-11 ***
## dpD         -1.79530    0.04811 -37.317  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.869 on 360 degrees of freedom
## Multiple R-squared:  0.796,  Adjusted R-squared:  0.7949 
## F-statistic: 702.4 on 2 and 360 DF,  p-value: < 2.2e-16
htdData %>%
    mutate(dpD=meanT-meanD) %>%
    mutate(predH=predict(htdReg, newdata=.)) %>%
    ggplot(aes(x=predH, y=meanH)) + 
    geom_point() + 
    geom_abline(slope=1, intercept=0) + 
    labs(title="Daily Averages", subtitle="Lincoln, NE (2016)", 
         x="Predicted Humidity (%)", y="Actual Humidity (%)"
         )

Example #11: Plotting Weather Data (Wind)

Wind data (speed, gust, direction) are also available in the lincoln_weather dataset..

Example code includes:

# Extract wind data
wdData <- lincoln_weather %>%
    select(CST, maxW=`Max Wind Speed [MPH]`, maxG=`Max Gust Speed [MPH]`, meanW=`Mean Wind Speed[MPH]`, 
           dirW=`WindDir [Degrees]`
           ) %>%
    mutate(date=as.Date(CST))
str(wdData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  6 variables:
##  $ CST  : chr  "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
##  $ maxW : int  20 15 13 17 22 16 16 25 25 10 ...
##  $ maxG : int  23 18 14 23 28 21 21 32 28 12 ...
##  $ meanW: int  9 6 5 7 13 7 7 16 14 5 ...
##  $ dirW : int  280 312 330 155 178 167 7 338 340 268 ...
##  $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Manage missing data
wdData[!complete.cases(wdData), ]
## # A tibble: 6 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-2-6      18    NA     5   241 2016-02-06
## 2 2016-2-28     36    45    18    NA 2016-02-28
## 3 2016-5-31     17    NA    11   319 2016-05-31
## 4 2016-6-18     NA    NA    NA    -1 2016-06-18
## 5 2016-6-19     NA    NA    NA    -1 2016-06-19
## 6 2016-12-19    17    NA     9   209 2016-12-19
wdData <- wdData %>%
    filter(dirW != -1, !is.na(dirW)) %>%
    mutate(maxG=ifelse(is.na(maxG), maxW, maxG))
summary(wdData)
##      CST                 maxW             maxG            meanW      
##  Length:363         Min.   :  8.00   Min.   : 10.00   Min.   : 0.00  
##  Class :character   1st Qu.: 16.00   1st Qu.: 21.00   1st Qu.: 6.00  
##  Mode  :character   Median : 21.00   Median : 26.00   Median : 8.00  
##                     Mean   : 21.71   Mean   : 27.93   Mean   : 9.11  
##                     3rd Qu.: 25.00   3rd Qu.: 33.00   3rd Qu.:11.00  
##                     Max.   :131.00   Max.   :143.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.5   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.8   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Manage very high wind data
wdData[wdData$maxG >= 60, ]
## # A tibble: 3 x 6
##   CST         maxW  maxG meanW  dirW date      
##   <chr>      <int> <int> <int> <int> <date>    
## 1 2016-3-23     45    61    25    20 2016-03-23
## 2 2016-6-17    131   143    20   170 2016-06-17
## 3 2016-12-25    51    64    19   139 2016-12-25
wdData <- wdData %>%
    filter(maxG <= 80)
summary(wdData)
##      CST                 maxW           maxG           meanW      
##  Length:362         Min.   : 8.0   Min.   :10.00   Min.   : 0.00  
##  Class :character   1st Qu.:16.0   1st Qu.:21.00   1st Qu.: 6.00  
##  Mode  :character   Median :21.0   Median :26.00   Median : 8.00  
##                     Mean   :21.4   Mean   :27.61   Mean   : 9.08  
##                     3rd Qu.:25.0   3rd Qu.:33.00   3rd Qu.:11.00  
##                     Max.   :51.0   Max.   :64.00   Max.   :27.00  
##       dirW            date           
##  Min.   :  1.0   Min.   :2016-01-01  
##  1st Qu.:143.2   1st Qu.:2016-04-01  
##  Median :193.0   Median :2016-07-03  
##  Mean   :209.9   Mean   :2016-07-01  
##  3rd Qu.:310.0   3rd Qu.:2016-10-01  
##  Max.   :358.0   Max.   :2016-12-31
# Density of wind speeds
wdData %>%
    select(date, meanW, maxW, maxG) %>%
    pivot_longer(-date) %>%
    ggplot(aes(x=value, fill=name)) + 
    geom_density(alpha=0.5) + 
    scale_fill_discrete(name="Wind Speed [MPH]", labels=c("Max Gust", "Max", "Mean")) + 
    labs(title="Lincoln, NE (2016) Wind Speeds", y="Density", x="Wind Speed [MPH]")

# Density of wind direction
wdData %>%
    select(date, dirW) %>%
    ggplot(aes(x=dirW)) + 
    geom_density(alpha=0.5, fill="blue") + 
    labs(title="Winds are mainly from the S and NW", subtitle="Lincoln, NE (2016)", 
         y="Density", x="Wind Direction"
         )

# Wind speed and direction
wdData %>% 
    ggplot(aes(x=meanW, y=dirW)) + 
    geom_point(alpha=0.25) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Mean Wind Speed", x="Mean Wind Speed [MPH]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)

# Wind speed and direction as factors
windDirs <- c("N", "NE", "E", "SE", "S", "SW", "W", "NW")
windSpeeds <- c(0, 5, 10, 15)
windLabels <- sapply(1:(length(windSpeeds)-1), FUN=function(x){ paste0(windSpeeds[x], "-", windSpeeds[x+1]) })
windLabels <- c(windLabels, paste0(windSpeeds[length(windSpeeds)], "+"))
wdData <- wdData %>% 
    mutate(wd=factor(floor(((dirW+22.5)/45) %% 8), levels=0:7, labels=windDirs), 
           ws=factor(findInterval(meanW, windSpeeds), levels=length(windSpeeds):1, labels=rev(windLabels))
           )

# Summary of interaction between wind speed and wind direction 
wdData %>%
    group_by(wd) %>% 
    summarize(n=n(), avgMean=mean(meanW), avgMax=mean(maxW), avgGust=mean(maxG))
## # A tibble: 8 x 5
##   wd        n avgMean avgMax avgGust
##   <fct> <int>   <dbl>  <dbl>   <dbl>
## 1 N        51   10.1    21.4    27.7
## 2 NE       18    8.17   19.4    24.8
## 3 E        16    7.44   18.9    24.6
## 4 SE       69    8.01   19.6    25.5
## 5 S        73    8.89   20.6    26.8
## 6 SW       26    6.96   20.3    26.2
## 7 W        29    8.10   20.4    26.5
## 8 NW       80   11.1    25.4    32.2
table(wdData$wd, wdData$ws)
##     
##      15+ 10-15 5-10 0-5
##   N    8    14   25   4
##   NE   1     7    6   4
##   E    0     3   12   1
##   SE   3    15   42   9
##   S    5    23   43   2
##   SW   0     6   15   5
##   W    3     4   18   4
##   NW  17    31   28   4
# Graph of wind speed and wind direction
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction")

# Graph of wind speed and wind direction (polar coordinates)
wdData %>%
    ggplot(aes(x=wd, fill=ws)) + 
    geom_bar() + 
    scale_fill_discrete(name="Wind Speed [MPH]") + 
    labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction") + 
    coord_polar(start=-0.4)

Example #12: Archived granular weather Data (METAR)

Iowa State has a great database of archived weather data, including the historical METAR data (meteorological aerodrome report) for a number of reporting stations.

METAR include information on visibility, wind, temperature, dew point, precipitation, clouds, barometric pressure, and other features that may impact safe aviation.

The data for station KLNK (Lincoln, NE airport) was saved as a CSV from Iowa State

Some processing is required before using the METAR data:

Example code includes:

# Load METAR data
klnk <- readr::read_csv("./RInputFiles/metar_klnk_2016.txt", na=c("", "NA", "M"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
str(klnk, give.attr=FALSE)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
# Filter to only data that ends with times ending in 54Z
metarKLNK <- klnk %>%
    filter(str_detect(metar, "54Z"))
dim(metarKLNK)
## [1] 8813   29
# There should be 24*368=8832 records, so there are a handful (19) of missing METAR observations
minDate <- min(metarKLNK$valid)
expDate <- minDate + lubridate::hours(0:(24*368 - 1))

# Observations expected but not recorded
as.POSIXct(setdiff(expDate, metarKLNK$valid), origin="1970-01-01", tz="UTC")
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
# Observations recorded but not expected
setdiff(metarKLNK$valid, expDate)
## numeric(0)
# Confirmation of uniqueness
length(unique(metarKLNK$valid)) == length(metarKLNK$valid)
## [1] TRUE
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)

mtxWind <- metarKLNK %>%
    pull(metar) %>%
    str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
head(mtxWind)
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA
table(mtxWind[, 2], useNA="ifany")
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19
table(mtxWind[, 3], useNA="ifany")
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19
table(mtxWind[, 4], useNA="ifany")
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777
# Verify that winds not captured are in fact missing from the METAR
metarKLNK[which(is.na(mtxWind[, 2])), "metar"]
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
metarKLNK <- metarKLNK %>%
    mutate(dirW=mtxWind[, 2], 
           spdW=as.numeric(mtxWind[, 3]), 
           gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
           )

# Plot for the wind direction
metarKLNK %>%
    ggplot(aes(x=dirW)) + 
    geom_bar() + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="# Hourly Observations", x="Wind Direction"
         )

# Plot for the minimum, average, and maximum wind speed by wind direction
# Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
metarKLNK %>%
    filter(!is.na(dirW)) %>%
    group_by(dirW) %>%
    summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
    ggplot(aes(x=dirW)) + 
    geom_point(aes(y=meanWind), color="red", size=2) + 
    geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
    labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)", 
         y="Wind Speed [KT]", x="Wind Direction"
         )

# Plot for the wind speed
# Roughly 10% of the time, there is no wind in Lincoln
metarKLNK %>%
    ggplot(aes(x=spdW)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    labs(title="Roughly 10% of wind speeds in Lincoln, NE measure 0 Knots", subtitle="KLNK METAR (2016)", 
         y="% Hourly Observations", x="Wind Speed {KT]"
         )
## Warning: Removed 19 rows containing non-finite values (stat_count).

metarKLNK %>% 
    filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
    mutate(dirW=as.numeric(dirW)) %>%
    group_by(dirW, spdW) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=spdW, y=dirW)) + 
    geom_point(alpha=0.1, aes(size=n)) + 
    coord_polar(theta="y") + 
    labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Wind Speed", x="Wind Speed [KT]") + 
    scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
    scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
    geom_point(aes(x=0, y=0), color="red", size=2)
## Warning: Removed 4 rows containing missing values (geom_point).

Example #13: Extracting Key Elements from METAR

A properly formatted METAR includes the following information in order, though with variable amounts of other information in between.

dddd54Z ddddd[Gdd]KT dSM [M]dd/[M]dd Adddd RMK SLPddd Tdddddddd

  • dddd54Z is the two-digit date and four-digit Zulu time (KLNK METAR are taken at 54 minutes past the hour)
  • ddddd[Gdd]KT is the three-digit wind direction (can be VRB), two digit wind speed in knots, and sometimes the two digit maximum gust in knots
  • dSM is the visibility in statute miles. This is somewhat tricky in that d can be any of 0-10 but can also be 1/4, 1/2, 3/4, 1 1/4, 1 1/2, 1 3/4, 2 1/2
  • [M]dd/[M]dd is the temperature in celsius and dewpoint in celsius. M means negative
  • Adddd is the four-digit altimeter reading
  • RMK notes that the remarks are beginning
  • SLPddd notes the three-digit sea-level pressure
  • Tdddddddd notes the four digit temperature in Celsius and the four digit dewpoint in celsius. If it begins with 1 it is negative. The fourth digit is the decimal. It will always be a Celsius reading that best corresponds to integer degrees of Fahrenheit

Example code includes:

metAll <- metarKLNK %>%
    pull(metar)

# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Find the number of matching elements
str_detect(metAll, pattern=valMet) %>% table()
## .
## FALSE  TRUE 
##    23  8790
# The strings that do not match have errors in the raw data (typically, missing wind speed)
metAll[!str_detect(metAll, pattern=valMet)]
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
# A matrix of string matches can be obtained
mtxParse <- str_match(metAll, pattern=valMet)
head(mtxParse)
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
# Create a data frame
dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE)
names(dfParse) <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                    "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                    )
dfParse <- tibble::as_tibble(dfParse)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  11 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : chr  "05" "00" "00" "03" ...
##  $ WindGust  : chr  NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: chr  "10SM" "10SM" "10SM" "10SM" ...
##  $ TempC     : chr  "M03" "M03" "M03" "M03" ...
##  $ DewC      : chr  "M07" "M07" "M07" "M06" ...
##  $ Altimeter : chr  "A3029" "A3030" "A3030" "A3031" ...
##  $ SLP       : chr  "SLP275" "SLP277" "SLP277" "SLP281" ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
# Convert to numeric where appropriate
dfParse <- dfParse %>%
    mutate(WindSpeed = as.integer(WindSpeed), 
           WindGust = as.numeric(WindGust), 
           Visibility = as.numeric(str_replace(Visibility, "SM", "")),
           TempC = as.integer(str_replace(TempC, "M", "-")), 
           DewC = as.integer(str_replace(DewC, "M", "-")), 
           Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
           SLP = as.integer(str_replace(SLP, "SLP", "")), 
           TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
           DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
           )
## Warning: NAs introduced by coercion
# Investigate the data
set.seed(2003211416)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  13 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
head(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
tail(dfParse)
## # A tibble: 6 x 13
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
dfParse %>% 
    sample_n(20)
## # A tibble: 20 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
# Check for NA values
colSums(is.na(dfParse))
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      TempF       DewF 
##         23         23         23         23         23         23
# Plot of counts by key metric
keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )

for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Correct for visibility
# Areas that have \\d \\d/\\dSM
sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
valSM1 <- str_replace(valSM1, "SM", "")
valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
valSM2 <- as.integer(str_sub(valSM2, 2, 2))

dfParse[sm1, "Visibility"] <- valSM1
dfParse[sm2, "Visibility"] <- dfParse[sm2, "Visibility"] + valSM2

dfParse %>% 
    count(Visibility)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
valGust <- as.integer(str_sub(valGust, 7, 8))

dfParse[gustCheck, "WindGust"] <- valGust

dfParse %>% 
    count(WindGust) %>% 
    as.data.frame
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
dfParse <- dfParse %>%
    mutate(modSLP=ifelse(dfParse$SLP < 500, 1000 + dfParse$SLP/10, 900 + dfParse$SLP/10))

dfParse %>%
    group_by(SLP, modSLP) %>%
    summarize(n=n()) %>%
    ggplot(aes(x=SLP, y=modSLP, size=n)) + 
    geom_point(alpha=0.3)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
keyMetric <- c("WindGust", "Visibility", "modSLP")
for (x in keyMetric) {
    p <- dfParse %>%
        group_by_at(vars(all_of(x))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=x, y="n")) + 
        geom_col() + 
        labs(title=x, y="Count")
    print(p)
}
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #14: Relationships Between METAR Variables

Many of the METAR variables are correlated/associated to one another.

Example code includes:

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Add the date back to the file (should edit the above instead)
dfParse <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid))
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : num  12 12 12 12 12 12 12 12 12 12 ...
# Keep only complete cases and find correlations
mtxCorr <- dfParse %>%
    mutate(month=lubridate::month(metarKLNK$valid)) %>%
    select_at(vars(all_of(coreNum))) %>%
    filter(complete.cases(.)) %>%
    cor()

# Print the correlations and show a heatmap
mtxCorr %>%
    round(2)
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00
corrplot::corrplot(mtxCorr, method="color", title="Lincoln, NE Hourly Weather Correlations (2016)")

# Create a function for plotting two variables against each other
plotNumCor <- function(var1, var2, title=NULL) {
    if (is.null(title)) 
        { title <- paste0("Lincoln, NE (2016) Hourly Correlations of ", var1, " and ", var2) }
    p <- dfParse %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title)
    print(p)
}

# The three linear or almost linear relationships
plotNumCor("TempC", "TempF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("DewC", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Strongly and positively related
plotNumCor("TempF", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Moderately negatively correlated
plotNumCor("TempF", "Altimeter")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("TempF", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

plotNumCor("Altimeter", "WindSpeed")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Predict modSLP from Altimeter
lmSLP1 <- lm(modSLP ~ Altimeter, data=dfParse)
lmSLP2 <- lm(modSLP ~ Altimeter + TempF, data=dfParse)
summary(lmSLP1)
## 
## Call:
## lm(formula = modSLP ~ Altimeter, data = dfParse)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
summary(lmSLP2)
## 
## Call:
## lm(formula = modSLP ~ Altimeter + TempF, data = dfParse)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
# Plot predictions vs. actual (model 1)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred1=predict(lmSLP1)) %>%
    count(modSLP, pred1) %>%
    ggplot(aes(x=modSLP, y=pred1)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude Only as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

# Plot predictions vs. actual (model 2)
dfParse %>%
    filter(!is.na(modSLP)) %>%
    mutate(pred2=predict(lmSLP2)) %>%
    count(modSLP, pred2) %>%
    ggplot(aes(x=modSLP, y=pred2)) + 
    geom_point(alpha=0.25, aes(size=n)) + 
    geom_smooth(method="lm", aes(weight=n)) + 
    labs(title="Predicted vs. Actual Sea Level Pressure - Altitude and Temperature as Predictor", 
         subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
         )

Example #15: Extracting Cloud Data from METAR

Cloud data is also included in the METAR, with the type of clouds being described as:

  • CLR - there are no clouds below 12,000 feet
  • VVddd - there is a vertical visibility of ddd hundred feet (cannot tell where the clouds are above that)
  • FEWddd - there are clouds with bases at ddd feet, and they obscure 25% or less of the sky
  • SCTddd - there are clouds with bases at ddd feet, and they obscure 25%-50% of the sky
  • BKNddd - there are clouds with bases at ddd feet, and they obscure 50%-99% of the sky
  • OVCddd - there is a full overcast with base at ddd feet

The ceiling is considered the lowest height that is measured as any of OVC, BKN, or VV.

Example code includes:

# Extract the CLR records
mtxCLR <- str_extract_all(metarKLNK$metar, pattern=" CLR ", simplify=TRUE)
if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

# Extract the VV records
mtxVV <- str_extract_all(metarKLNK$metar, pattern="VV(\\d{3})", simplify=TRUE)
if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)

# Extract the FEW records
mtxFEW <- str_extract_all(metarKLNK$metar, pattern="FEW(\\d{3})", simplify=TRUE)
numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

# Extract the SCT records
mtxSCT <- str_extract_all(metarKLNK$metar, pattern="SCT(\\d{3})", simplify=TRUE)
numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

# Extract the BKN records
mtxBKN <- str_extract_all(metarKLNK$metar, pattern="BKN(\\d{3})", simplify=TRUE)
numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

# Extract the OVC records
mtxOVC <- str_extract_all(metarKLNK$metar, pattern="OVC(\\d{3})", simplify=TRUE)
numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

# Summarize as a data frame
tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                            numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                            )

# Get the counts
# As expected, if isCLR then nothing else, and if isVV then nothing else
tblClouds %>% 
    count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
    as.data.frame()
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
# Investigate the problem data
metarKLNK$metar[rowSums(tblClouds, na.rm=TRUE)==0]
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
# Get the counts of most obscuration
tblClouds %>%
    filter(rowSums(., na.rm=TRUE) > 0) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  )
           ) %>%
    ggplot(aes(x=wType, y=..count../sum(..count..))) + 
    geom_bar() + 
    labs(title="Highest Obscuration by Cloud - Lincoln, NE (2016)", x="Cloud Type", 
         y="Proportion of Hourly Measurements"
         )

# Integrate the clouds data
mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)

# Cycle through to find levels of a given type
ckClouds <- function(cloudType) {
    isKey <- which(apply(mtxCloud, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtxCloud[, min(isKey)], cloudType, "")) * 100
}
lowOVC <- ckClouds("OVC")
lowVV <- ckClouds("VV")
lowBKN <- ckClouds("BKN")
lowSCT <- ckClouds("SCT")
lowFEW <- ckClouds("FEW")

# Integrate the lowest cloud type by level
lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
lowCloud
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows
# Get the lowest cloud level
minCloud <- lowCloud
minCloud[is.na(minCloud)] <- 999999
minCloudLevel <- apply(minCloud, 1, FUN=min)
minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

noCloudPct <- mean(minCloudLevel == 999999)
noCeilingPct <- mean(minCeilingLevel == 999999)

# Plot the minimum cloud level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCloudLevel != 999999) %>%
    ggplot(aes(x=minCloudLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Cloud Height (when some clouds exist)", 
         subtitle="Lincoln, NE (2016)"
         )

# Plot the minimum ceiling level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
    filter(minCeilingLevel != 999999) %>%
    ggplot(aes(x=minCeilingLevel)) + 
    geom_bar(aes(y=..count../sum(..count..))) + 
    geom_text(aes(x=2500, y=0.04, 
                  label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                  )
              ) + 
    labs(x="Height [ft]", y="Proportion", title="Minimum Ceiling Height (when a ceiling exists)", 
         subtitle="Lincoln, NE (2016)"
         )

Example #16: Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Integrate the cloud data and convert month to a factor
dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
    mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                  numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                  TRUE ~ "Error"
                                  ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                  ), 
           month=factor(month, levels=1:12, labels=month.abb)
           )
dfFull <- tibble::as_tibble(dfFull)
str(dfFull)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  28 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
# Run the boxplot of a factor against a numeric variable
plotFactorNumeric <- function(fctVar, numVar, title=NULL) {
    if (is.null(title)) { title <- paste0("Lincoln, NE (2016) Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- dfFull %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title)
    print(p)
}

# Run for all of the key variables against wind speed and cloud type
keyVar <- c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF")
for (var in keyVar) { plotFactorNumeric("month", var) }

for (var in keyVar) { plotFactorNumeric("wType", var) }

# Create stacked bars for cloud type by month
dfFull %>%
    filter(!is.na(wType), wType!="Error") %>%
    ggplot(aes(x=month, fill=wType)) + 
    geom_bar(position="fill") + 
    labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")

Example #17: Functional Form - METAR download and initial wind processing

Example 12 can be converted to functional form so that the process can be applied to other reporting stations and time periods.

Example code includes:

# Function to make an initial read of the data, filter to METAR records, and check date-times
readMETAR <- function(fileName="./RInputFiles/metar_klnk_2016.txt", timeZ="54Z",
                      expMin=as.POSIXct("2015-12-31 00:54:00", tz="UTC"), expDays=365
                      ) {

    # Load METAR data
    initRead <- readr::read_csv(fileName, na=c("", "NA", "M"))
    str(initRead, give.attr=FALSE)

    # Filter to only data that ends with times ending in 54Z
    filterRead <- initRead %>%
        filter(str_detect(metar, timeZ))
    dim(filterRead)

    # Check that the dates and times included are as expected
    expDate <- expMin + lubridate::hours(0:(24*expDays - 1))
    
    # Observations expected but not recorded
    cat("\n*** OBSERVATIONS EXPECTED BUT NOT RECORDED ***\n")
    print(as.POSIXct(setdiff(expDate, filterRead$valid), origin="1970-01-01", tz="UTC"))

    # Observations recorded but not expected
    cat("\n*** OBSERVATIONS RECORDED BUT NOT EXPECTED ***\n")
    print(as.POSIXct(setdiff(filterRead$valid, expDate), origin="1970-01-01", tz="UTC"))

    # Confirmation of uniqueness
    cat("\n*** Are the extracted records unique? ***\n")
    print(length(unique(filterRead$valid)) == length(filterRead$valid))
    cat("\n")
    
    # Return the dataset as a tibble
    tibble::as_tibble(filterRead)
}
funcMETAR <- readMETAR(expDays=368)
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
funcMETAR
## # A tibble: 8,813 x 29
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 19 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)
extractWind <- function(met) {

    mtxWind <- met %>%
        pull(metar) %>%
        str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
    cat("\n*** First 6 winds and parsing ***\n")
    print(head(mtxWind))

    cat("\n*** Table of WIND DIRECTION ***\n")
    print(table(mtxWind[, 2], useNA="ifany"))
    cat("\n*** Table of WIND SPEED ***\n")
    print(table(mtxWind[, 3], useNA="ifany"))
    cat("\n*** Table of WIND GUST ***\n")
    print(table(mtxWind[, 4], useNA="ifany"))

    # Verify that winds not captured are in fact missing from the METAR
    cat("\n *** WIND DATA WAS NOT CAPTURED FROM: *** \n")
    print(met[which(is.na(mtxWind[, 2])), "metar"])
    cat("\n")

    met %>%
        mutate(dirW=mtxWind[, 2], 
               spdW=as.numeric(mtxWind[, 3]), 
               gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
               )
}
windMETAR <- extractWind(funcMETAR)
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
windMETAR
## # A tibble: 8,813 x 32
##    station valid                tmpf  dwpf  relh  drct  sknt p01i   alti  mslp
##    <chr>   <dttm>              <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
##  1 LNK     2015-12-31 00:54:00  27.0  19.9  74.5   300     5 0.00   30.3 1028.
##  2 LNK     2015-12-31 01:54:00  26.1  19.9  77.3     0     0 0.00   30.3 1028.
##  3 LNK     2015-12-31 02:54:00  27.0  19.9  74.5     0     0 0.00   30.3 1028.
##  4 LNK     2015-12-31 03:54:00  27.0  21.0  78     280     3 0.00   30.3 1028.
##  5 LNK     2015-12-31 04:54:00  27.0  19.9  74.5   310     5 0.00   30.3 1029.
##  6 LNK     2015-12-31 05:54:00  21.0  14    73.9    10     9 0.00   30.3 1029.
##  7 LNK     2015-12-31 06:54:00  19.0  12.0  73.7     0     0 0.00   30.3 1029 
##  8 LNK     2015-12-31 07:54:00  18.0  12.0  77.2    10     3 0.00   30.3 1029.
##  9 LNK     2015-12-31 08:54:00  14    10.0  83.9     0     0 0.00   30.3 1030.
## 10 LNK     2015-12-31 09:54:00  16.0  10.9  80.1     0     0 0.00   30.4 1030.
## # ... with 8,803 more rows, and 22 more variables: vsby <dbl>, gust <dbl>,
## #   skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## #   skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## #   ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## #   peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## #   feel <dbl>, metar <chr>, dirW <chr>, spdW <dbl>, gustW <dbl>
# Generate basic wind plots
basicWindPlots <- function(met, desc="Lincoln, NE", gran="KLNK METAR (2016)") {

    # Plot for the wind direction
    wDir <- met %>%
        ggplot(aes(x=dirW)) + 
        geom_bar() + 
        labs(title=paste0(desc, " Wind Direction"), subtitle=gran, 
             y="# Hourly Observations", x="Wind Direction"
             )
    print(wDir)

    # Plot for the minimum, average, and maximum wind speed by wind direction
    # Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
    wSpeedByDir <- met %>%
        filter(!is.na(dirW)) %>%
        group_by(dirW) %>%
        summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
        ggplot(aes(x=dirW)) + 
        geom_point(aes(y=meanWind), color="red", size=2) + 
        geom_errorbar(aes(ymin=minWind, ymax=maxWind)) + 
        labs(title=paste0(desc, " Wind Speed (Max, Mean, Min) By Wind Direction"), subtitle=gran, 
             y="Wind Speed [KT]", x="Wind Direction"
             )
    print(wSpeedByDir)

    # Plot for the wind speed
    pctZero <- sum(met$spdW==0, na.rm=TRUE) / length(met$spdW)
    wSpeed <- met %>%
        ggplot(aes(x=spdW)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        labs(title=paste0(round(100*pctZero), "% of wind speeds in ", desc, " measure 0 Knots"), 
             subtitle=gran, 
             y="% Hourly Observations", x="Wind Speed {KT]"
             )
    print(wSpeed)

    wPolarDirSpeed <- met %>% 
        filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
        mutate(dirW=as.numeric(dirW)) %>%
        group_by(dirW, spdW) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=spdW, y=dirW)) + 
        geom_point(alpha=0.1, aes(size=n)) + 
        coord_polar(theta="y") + 
        labs(title=paste0(desc, " Direction vs. Wind Speed"), subtitle=gran, x="Wind Speed [KT]") + 
        scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) + 
        scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) + 
        geom_point(aes(x=0, y=0), color="red", size=2)
    print(wPolarDirSpeed)
}
basicWindPlots(windMETAR)

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).

Example #18: Functional Form for Extracting Key Elements from METAR

METAR parsing can also be converted to a functional form. This will need to be modified to be more general, since the codes used for a few things like clouds can vary from station to station.

Example code includes:

# Code for the initial METAR parsing
initialParseMETAR <- function(met, val, labs) {
    
    # Pull the METAR data
    metAll <- met %>%
        pull(metar)
    
    # Find the number of matching elements
    cat("\n*** Tentative Summary of Element Parsing *** \n")
    str_detect(metAll, pattern=val) %>% 
        table() %>%
        print()

    # The strings that do not match have errors in the raw data (typically, missing wind speed)
    cat("\n*** Data Not Matched *** \n")
    print(metAll[!str_detect(metAll, pattern=val)])

    # A matrix of string matches can be obtained
    mtxParse <- str_match(metAll, pattern=val)
    cat("\n*** Parsing matrix summary *** \n")
    print(dim(mtxParse))
    print(head(mtxParse))

    # Create a data frame
    dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE) %>%
        mutate(dtime=met$valid, origMETAR=met$metar)
    names(dfParse) <- c(labs, "dtime", "origMETAR")
    dfParse <- tibble::as_tibble(dfParse)
    cat("\n*** Summary of the parsed data *** \n")
    glimpse(dfParse)
    
    dfParse
}


# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"

# Create the names for the search string to parse in to
labsMet <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
             "TempC", "DewC", "Altimeter", "SLP", "FahrC"
             )

# Run the METAR parsing on the raw data
initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
initMETAR
## # A tibble: 8,813 x 13
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC  Altimeter SLP  
##    <chr> <chr>   <chr>     <chr>    <chr> <chr>      <chr> <chr> <chr>     <chr>
##  1 54Z ~ 300     05        <NA>     " "   10SM       M03   M07   A3029     SLP2~
##  2 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  3 54Z ~ 000     00        <NA>     " "   10SM       M03   M07   A3030     SLP2~
##  4 54Z ~ 280     03        <NA>     " "   10SM       M03   M06   A3031     SLP2~
##  5 54Z ~ 310     05        <NA>     " "   10SM       M03   M07   A3033     SLP2~
##  6 54Z ~ 010     09        <NA>     " "   10SM       M06   M10   A3033     SLP2~
##  7 54Z ~ 000     00        <NA>     " "   10SM       M07   M11   A3034     SLP2~
##  8 54Z ~ 010     03        <NA>     " "   10SM       M08   M11   A3034     SLP2~
##  9 54Z ~ 000     00        <NA>     " "   10SM       M10   M12   A3034     SLP2~
## 10 54Z ~ 000     00        <NA>     " "   10SM       M09   M12   A3035     SLP2~
## # ... with 8,803 more rows, and 3 more variables: FahrC <chr>, dtime <dttm>,
## #   origMETAR <chr>
# Helper function for generating plots by key variables
plotcountsByMetric <- function(df, mets) {
    
    # Plot of counts by key metric
    for (x in mets) {
        p <- df %>%
            group_by_at(vars(all_of(x))) %>%
            summarize(n=n()) %>%
            ggplot(aes_string(x=x, y="n")) + 
            geom_col() + 
            labs(title=x, y="Count")
        print(p)
    }
}


# Code for the conversion of METAR to meaningful numeric
# Should make this much more general later
convertMETAR <- function(met, metrics, seed=2003211416) {
    
    # Convert to numeric where appropriate
    dfParse <- met %>%
        mutate(WindSpeed = as.integer(WindSpeed), 
               WindGust = as.numeric(WindGust), 
               Visibility = as.numeric(str_replace(Visibility, "SM", "")),
               TempC = as.integer(str_replace(TempC, "M", "-")), 
               DewC = as.integer(str_replace(DewC, "M", "-")), 
               Altimeter = as.integer(str_replace(Altimeter, "A", "")), 
               SLP = as.integer(str_replace(SLP, "SLP", "")), 
               TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10, 
               DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
               )

    # Investigate the data
    cat("\n *** Parsed data structure, head, tail, and random sample *** \n")
    str(dfParse)
    print(head(dfParse))
    print(tail(dfParse))
    set.seed(seed)
    dfParse %>% 
        sample_n(20) %>%
        print()

    # Check for NA values
    cat("\n *** Number of NA values *** \n")
    print(colSums(is.na(dfParse)))

    # Plot of counts by key metric
    plotcountsByMetric(dfParse, mets=metrics)
    
    # Return the parsed dataset
    dfParse
}

keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
               "DewC", "Altimeter", "SLP", "TempF", "DewF"
               )
convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
## Warning: NAs introduced by coercion
## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit

# Address the visibility issues
getVisibility <- function(curMet, origMet) {
    
    # Get the original METAR data
    metAll <- origMet %>%
        pull(metar)
    
    # Correct for visibility
    # Areas that have \\d \\d/\\dSM
    sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
    sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))

    valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
    valSM1 <- str_replace(valSM1, "SM", "")
    valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))

    valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
    valSM2 <- as.integer(str_sub(valSM2, 2, 2))

    curMet[sm1, "Visibility"] <- valSM1
    curMet[sm2, "Visibility"] <- curMet[sm2, "Visibility"] + valSM2

    curMet %>% 
        count(Visibility) %>%
        print()
    
    curMet
}
parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
# Correct for wind gusts
getWindGusts <- function(curMet, origMet) {

    metAll <- origMet %>%
        pull(metar)
    
    gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
    valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
    valGust <- as.integer(str_sub(valGust, 7, 8))

    curMet[gustCheck, "WindGust"] <- valGust

    curMet %>% 
        count(WindGust) %>% 
        as.data.frame %>%
        print()
    
    curMet
}
parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
# Correct for SLP
fixSLP <- function(curMet) {

    dfParse <- curMet %>%
        mutate(modSLP=ifelse(curMet$SLP < 500, 1000 + curMet$SLP/10, 900 + curMet$SLP/10))

    p <- dfParse %>%
        group_by(SLP, modSLP) %>%
        summarize(n=n()) %>%
        ggplot(aes(x=SLP, y=modSLP, size=n)) + 
        geom_point(alpha=0.3)
    print(p)
    
    dfParse
}
parseMETAR <- fixSLP(parseMETAR)
## Warning: Removed 1 rows containing missing values (geom_point).

# Check updated plots
plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

Example #19: Functional Form For Relationships Between METAR Variables

The relationships between METAR variables can also be converted to functional form.

Example code includes:

# Function to calculate, display, and plot variable correlations
corMETAR <- function(met, numVars, subT="") {

    # Keep only complete cases and report on data kept
    dfUse <- met %>%
        select_at(vars(all_of(numVars))) %>%
        filter(complete.cases(.))
    
    nU <- nrow(dfUse)
    nM <- nrow(met)
    myPct <- round(100*nU/nM, 1)
    cat("\n *** Correlations use ", nU, " complete cases (", myPct, "% of ", nM, " total) ***\n", sep="")
    
    # Create the correlation matrix
    mtxCorr <- dfUse %>%
        cor()

    # Print the correlations
    mtxCorr %>%
        round(2) %>%
        print()

    # Display a heat map
    corrplot::corrplot(mtxCorr, method="color", title=paste0("Hourly Weather Correlations\n", subT))
}

# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")

# Run the correlations function
corMETAR(parseMETAR, numVars=coreNum)
## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

# Create a function for plotting two variables against each other
plotNumCor <- function(met, var1, var2, title=NULL, subT="") {
    if (is.null(title)) 
        { title <- paste0("Hourly Correlations of ", var1, " and ", var2) }
    p <- met %>%
        group_by_at(vars(all_of(c(var1, var2)))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=var1, y=var2)) + 
        geom_point(alpha=0.5, aes_string(size="n")) + 
        geom_smooth(method="lm", aes_string(weight="n")) + 
        labs(x=var1, y=var2, title=title, subtitle=subT)
    print(p)
}

var1List <- c("TempC", "DewC", "Altimeter", "TempF", "TempF",     "TempF",  "Altimeter")
var2List <- c("TempF", "DewF", "modSLP",    "DewF",  "Altimeter", "modSLP", "WindSpeed")

for (n in 1:length(var1List)) {
    plotNumCor(parseMETAR, var1List[n], var2List[n])
}
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

# Function for linear regressions on METAR data
lmMETAR <- function(met, y, x, yName, subT="Lincoln, NE (2016) Hourly METAR") {
    
    # Convert to formula
    myChar <- paste0(y, " ~ ", x)
    cat("\n *** Regression call is:", myChar, "***\n")
    
    # Run regression
    regr <- lm(formula(myChar), data=met)
    
    # Summarize regression
    print(summary(regr))
    
    # Predict the new values
    pred <- predict(regr, newdata=met)
    
    # Plot the predictions
    p <- met %>%
        select_at(vars(all_of(y))) %>%
        mutate(pred=pred) %>%
        group_by_at(vars(all_of(c(y, "pred")))) %>%
        summarize(n=n()) %>%
        ggplot(aes_string(x=y, y="pred")) + 
        geom_point(aes(size=n), alpha=0.25) + 
        geom_smooth(aes(weight=n), method="lm") + 
        labs(title=paste0("Predicted vs. Actual ", yName, " - ", x, " as Predictor"), 
             subtitle=subT, x=paste0("Actual ", yName), y=paste0("Predicted ", yName)
             )
    print(p)
}

lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure")
## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

Example #20: Functional Form for Extracting Cloud Data from METAR

Cloud data can also be extracted using the functional form.

Example code includes:

extractClouds <- function(met, metVar, subT="Lincoln, NE (2016) Hourly METAR") {

    metAll <- met %>%
        pull(metVar)
    
    # Extract the CLR records
    mtxCLR <- str_extract_all(metAll, pattern=" CLR ", simplify=TRUE)
    if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
    isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)

    # Extract the VV records
    mtxVV <- str_extract_all(metAll, pattern="VV(\\d{3})", simplify=TRUE)
    if (dim(mtxVV)[[2]] > 1) { stop("Extracted 2+ VV from some METAR; investigate") }
    if ((dim(mtxVV))[[2]] == 0) {
        cat("\nNo Records with a cloud type of vertical visibility (VV)\n")
        isVV <- rep(0, times=length(isCLR))
        htVV <- rep(NA, times=length(isCLR))
    } else {
        isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
        htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)
    }

    # Extract the FEW records
    mtxFEW <- str_extract_all(metAll, pattern="FEW(\\d{3})", simplify=TRUE)
    numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the SCT records
    mtxSCT <- str_extract_all(metAll, pattern="SCT(\\d{3})", simplify=TRUE)
    numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the BKN records
    mtxBKN <- str_extract_all(metAll, pattern="BKN(\\d{3})", simplify=TRUE)
    numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )

    # Extract the OVC records
    mtxOVC <- str_extract_all(metAll, pattern="OVC(\\d{3})", simplify=TRUE)
    numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )

    # Summarize as a data frame
    tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW, 
                                numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
                                )

    # Get the counts
    cat("\n*** Counts by number of layers of each cloud type ***\n")
    tblClouds %>% 
        count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
        as.data.frame() %>%
        print()

    # Investigate the problem data
    cat("\n*** METAR records where no clouds were extracted ***\n")
    metAll[rowSums(tblClouds, na.rm=TRUE)==0] %>%
        print()
    
    # Plot the counts of most obscuration
    p <- tblClouds %>%
        filter(rowSums(., na.rm=TRUE) > 0) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            )
               ) %>%
        ggplot(aes(x=wType, y=..count../sum(..count..))) + 
        geom_bar() + 
        labs(title="Highest Obscuration by Cloud", subtitle=subT, 
             x="Cloud Type", y="Proportion of Hourly Measurements"
             )
    print(p)
    
    # Integrate the clouds data
    mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)
    cat("\n*** Dimensions for the cloud matrix ***\n")
    print(dim(mtxCloud))
    
    list(tblClouds=tblClouds, mtxCloud=mtxCloud)
}

# Run the initial cloud extraction
initClouds <- extractClouds(parseMETAR, metVar="origMETAR")
## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
str(initClouds)
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
# Cycle through to find levels of a given type
ckClouds <- function(cloudType, mtx) {
    isKey <- which(apply(mtx, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
    as.integer(str_replace(mtx[, min(isKey)], cloudType, "")) * 100
}


# Function to create the lowest cloud levels
findLowestClouds <- function(mtxCloud, subT="Lincoln, NE (2016) Hourly METAR") {

    # Find the lowest clouds by cloud type
    lowOVC <- ckClouds("OVC", mtx=mtxCloud)
    lowVV <- ckClouds("VV", mtx=mtxCloud)
    lowBKN <- ckClouds("BKN", mtx=mtxCloud)
    lowSCT <- ckClouds("SCT", mtx=mtxCloud)
    lowFEW <- ckClouds("FEW", mtx=mtxCloud)

    # Integrate the lowest cloud type by level
    lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
    cat("\n*** Lowest clouds by type tibble ***\n")
    print(lowCloud)

    # Get the lowest cloud level
    minCloud <- lowCloud
    minCloud[is.na(minCloud)] <- 999999
    minCloudLevel <- apply(minCloud, 1, FUN=min)
    minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)

    noCloudPct <- mean(minCloudLevel == 999999)
    noCeilingPct <- mean(minCeilingLevel == 999999)

    # Plot the minimum cloud level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCloudLevel != 999999) %>%
        ggplot(aes(x=minCloudLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Cloud Height (when some clouds exist)", subtitle=subT
             )
    print(p)

    # Plot the minimum ceiling level (where it exists)
    p <- data.frame(minCloudLevel, minCeilingLevel) %>%
        filter(minCeilingLevel != 999999) %>%
        ggplot(aes(x=minCeilingLevel)) + 
        geom_bar(aes(y=..count../sum(..count..))) + 
        geom_text(aes(x=2500, y=0.04, 
                      label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
                      )
                  ) + 
        labs(x="Height [ft]", y="Proportion", 
             title="Minimum Ceiling Height (when a ceiling exists)", subtitle=subT
             )
    print(p)
    
    list(lowCloud=lowCloud, minCeilingLevel=minCeilingLevel, minCloudLevel=minCloudLevel)
}

processedClouds <-findLowestClouds(initClouds$mtxCloud)
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

str(processedClouds)
## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...

Example #21: Functional Form for Plotting by factor variables

The month of the year is an interesting data point for plotting against.

Example code includes:

# Function to bind the existing parsed METAR data with the cloud data
bindMETAR <- function(dfParse, tblClouds, lowCloud) {

    # Integrate the cloud data and convert month to a factor
    dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
        mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC", 
                                      numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW", 
                                      TRUE ~ "Error"
                                      ), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
                            ), 
               month=factor(lubridate::month(dtime), levels=1:12, labels=month.abb)
               )
    
    dfFull <- tibble::as_tibble(dfFull)
    str(dfFull)
    
    dfFull
}

fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                       tblClouds=initClouds$tblClouds, 
                       lowCloud=processedClouds$lowCloud
                       )
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
# Updated function for plotting numeric by factor
plotFactorNumeric <- function(met, fctVar, numVar, title=NULL, subT) {
    if (is.null(title)) { title <- paste0("Hourly Weather - ", numVar, " vs. ", fctVar) }
    p <- met %>%
        filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
        ggplot(aes_string(x=fctVar, y=numVar)) + 
        geom_boxplot(fill="lightblue") + 
        labs(title=title, subtitle=subT)
    print(p)
}

# Function for creating cloud plots
makeFactorPlots <- function(met, 
                            fctVar=c("month", "wType"), 
                            keyVar=c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF"), 
                            desc="Lincoln, NE (2016) Hourly METAR"
                            ) {

    # Run for all of the key variables against wind speed and cloud type
    for (varF in fctVar) {
        for (varK in keyVar) { 
            plotFactorNumeric(met, fctVar=varF, numVar=varK, subT=desc) 
        }
    }

    # Create stacked bars for cloud type by month
    # dfFull %>%
    #     filter(!is.na(wType), wType!="Error") %>%
    #     ggplot(aes(x=month, fill=wType)) + 
    #     geom_bar(position="fill") + 
    #     labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")
}

makeFactorPlots(fullMETAR)

Example #22: Combining Functional Forms for METAR Processing

The functions can be combined in to a single routine for reading, parsing, and running EDA on METAR data..

Example code includes:

# Function to run the full process
runAllMETAR <- function(fname, timeZ, expMin, expDays, locMET, shortMET, longMET, valMet, 
                        labsMet=c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility", 
                                  "TempC", "DewC", "Altimeter", "SLP", "FahrC"
                                  ), 
                        keyMetric=c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC", 
                                    "DewC", "Altimeter", "SLP", "TempF", "DewF"
                                    ), 
                        coreNum=c("TempC", "TempF", "DewC", "DewF", 
                                  "Altimeter", "modSLP", "WindSpeed", "Visibility"
                                  ), 
                        var1List=c("TempC", "DewC", "Altimeter", "TempF", "TempF", "TempF", "Altimeter"), 
                        var2List=c("TempF", "DewF", "modSLP", "DewF", "Altimeter", "modSLP", "WindSpeed")
                        ) {
    
    # Read in the METAR data
    funcMETAR <- readMETAR(fileName=fname, timeZ=timeZ, expMin=expMin, expDays=expDays)
    # funcMETAR

    # Extract wind data from METAR
    windMETAR <- extractWind(funcMETAR)
    # windMETAR

    # Run basic wind plots
    basicWindPlots(windMETAR, desc=locMET, gran=shortMET)

    # Run the METAR parsing on the raw data
    initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
    # initMETAR

    # Parse and convert the METAR data
    convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
    # convMETAR

    # Fix problems with visibility, wind gusts, and SLP
    parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
    parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
    parseMETAR <- fixSLP(parseMETAR)

    # Check updated plots
    plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))

    # Run the correlations function
    corMETAR(parseMETAR, numVars=coreNum, subT=longMET)

    # Plot correlations
    for (n in 1:length(var1List)) {
        plotNumCor(parseMETAR, var1List[n], var2List[n], subT=longMET)
    }

    # Run lm models for SLP vs Altimeter and (optionally) Temperature
    lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure", subT=longMET)
    lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure", subT=longMET)

    # Run the initial cloud extraction
    initClouds <- extractClouds(parseMETAR, metVar="origMETAR", subT=longMET)
    str(initClouds)

    # Find the lowest cloud levels and lowest ceilings
    processedClouds <-findLowestClouds(initClouds$mtxCloud, subT=longMET)
    str(processedClouds)

    # Bind the processed METAR and the cloud data
    fullMETAR <- bindMETAR(dfParse=parseMETAR, 
                           tblClouds=initClouds$tblClouds, 
                           lowCloud=processedClouds$lowCloud
                           )

    # Create box plots for key weather elements against month and cloud type
    makeFactorPlots(fullMETAR, desc=longMET)
    
    # Return all of the elements
    list(fullMETAR=fullMETAR, funcMETAR=funcMETAR, windMETAR=windMETAR, 
         initMETAR=initMETAR, convMETAR=convMETAR, parseMETAR=parseMETAR, 
         initClouds=initClouds, processedClouds=processedClouds
         )
}

Followed by caching the run of the function for Lincoln, NE:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_klnk_2016.txt"  # file name for raw METAR data
timeZ <- "54Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:54:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Lincoln, NE"  # Description of city or location
shortMET <- "KLNK METAR (2016)"  # Station code and timing
longMET <- "Lincoln, NE Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
klnk2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of  29 variables:
##  $ station          : chr  "LNK" "LNK" "LNK" "LNK" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ tmpf             : num  27 26.1 27 27 27 ...
##  $ dwpf             : num  19.9 19.9 19.9 21 19.9 ...
##  $ relh             : num  74.5 77.3 74.5 78 74.5 ...
##  $ drct             : num  300 0 0 280 310 10 0 10 20 0 ...
##  $ sknt             : num  5 0 0 3 5 9 0 3 3 0 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.3 30.3 30.3 30.3 30.3 ...
##  $ mslp             : num  1028 1028 1028 1028 1029 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
##  $ skyl2            : num  NA NA NA NA 2700 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.4 26.1 27 22.9 20.4 ...
##  $ metar            : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
##  [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
##  [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
##  [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
##  [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30005KT" "300" "05" NA  
## [2,] "00000KT" "000" "00" NA  
## [3,] "00000KT" "000" "00" NA  
## [4,] "28003KT" "280" "03" NA  
## [5,] "31005KT" "310" "05" NA  
## [6,] "01009KT" "010" "09" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  875  269  199  146  135  121  108   95   88   65  102  158  169  245  241  339 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  463  565  517  413  284  179  142   96   73   80  105   89  121  141  147  225 
##  320  330  340  350  360  VRB <NA> 
##  234  303  352  413  383  114   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  875  615  704  664  696  651  636  599  536  451  439  371  315  276  235  173 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   34 
##  156  108   69   62   45   33   23   13   14    9   10    6    6    2    1    1 
## <NA> 
##   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    6   11   11   16   30   59   69   87   83  107  101   83   62   61   61   37 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45 <NA> 
##   28   24   18   13   12   14    6    6   10   11    2    3    1    2    2 7777 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028                 
##  2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083                  
##  3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
##  4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
##  5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $                 
##  6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007             
##  7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
##  8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $                    
##  9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106                   
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200                
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $                 
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222       
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001 
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010 
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183                   
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100                   
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $           
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067                   
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 4 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    23  8790 
## 
## *** Data Not Matched *** 
##  [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"                   
##  [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"                    
##  [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"  
##  [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"                
##  [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $" 
##  [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"                   
##  [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"               
##  [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
##  [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                      
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"                
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"                     
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"                  
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"              
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"                   
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"         
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"   
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"   
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"                     
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"                     
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"             
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"             
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"                     
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"                
## 
## *** Parsing matrix summary *** 
## [1] 8813   11
##      [,1]                                                                   
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"       
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"       
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"       
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"       
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"  
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "05" NA   " "  "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA   " "  "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA   " "  "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA   " "  "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA   " "  "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
## 
## *** Summary of the parsed data *** 
## Observations: 8,813
## Variables: 13
## $ METAR      <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir    <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed  <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC       <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter  <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP        <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC      <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  15 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 300             5       NA " "           10    -3    -7      3029   275
## 2 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 3 54Z ~ 000             0       NA " "           10    -3    -7      3030   277
## 4 54Z ~ 280             3       NA " "           10    -3    -6      3031   281
## 5 54Z ~ 310             5       NA " "           10    -3    -7      3033   286
## 6 54Z ~ 010             9       NA " "           10    -6   -10      3033   289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 54Z ~ 160             6       NA " "           10     2    -4      2993   147
## 2 54Z ~ VRB             4       NA " "           10     4    -4      2990   139
## 3 54Z ~ 130             7       NA " "           10     4    -7      2989   134
## 4 54Z ~ 110             7       NA " "           10     4    -7      2988   130
## 5 54Z ~ 100            10       NA " "           10     3    -5      2986   125
## 6 54Z ~ 100            10       NA " "           10     3    -6      2986   123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 54Z ~ 360             7       NA " "           10    21    15      2995   130
##  2 54Z ~ 210            13       NA " "           10    31     4      2988   108
##  3 54Z ~ 340            11       NA " "           10    -7   -13      3025   259
##  4 54Z ~ 240             9       NA " "           10    13     8      2961    18
##  5 54Z ~ 310             6       NA " "           10    -7   -19      3044   329
##  6 54Z ~ 040             7       NA " "           10    26    14      3010   179
##  7 54Z ~ 170            17       NA " "           10    31    22      2983    90
##  8 54Z ~ 160             7       NA " "           10    -1    -4      3002   182
##  9 54Z ~ 310             9       NA " "           10    13     9      3021   226
## 10 54Z ~ 110            10       NA " "           10    16    16      2990   116
## 11 54Z ~ 140             5       NA " "           10    19    18      3013   199
## 12 54Z ~ 110             6       NA " "           10    20     4      3012   197
## 13 54Z ~ 220             4       NA " "           10     3    -4      3042   311
## 14 54Z ~ 350             3       NA " "            8     9     8      3011   194
## 15 54Z ~ 080             8       NA " "           10   -12   -18      3046   340
## 16 54Z ~ 320             7       NA " "           10    16    11      2998   144
## 17 54Z ~ 330             6       NA " "           10     9     5      3016   214
## 18 54Z ~ 340            13       NA " "            3     0    -1      2979    99
## 19 54Z ~ 190             5       NA " "           10     3    -4      3024   251
## 20 54Z ~ 320            18       NA " "           10    24    12      2999   147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         23         23         23       8813         23         23         23 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         23         23         23         23          0          0         23 
##       DewF 
##         23

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 18 x 2
##    Visibility     n
##         <dbl> <int>
##  1       0.25    20
##  2       0.5     16
##  3       0.75    15
##  4       1       19
##  5       1.25    13
##  6       1.5     17
##  7       1.75    21
##  8       2       50
##  9       2.5     38
## 10       3       70
## 11       4      108
## 12       5      108
## 13       6      146
## 14       7      189
## 15       8      221
## 16       9      290
## 17      10     7449
## 18      NA       23
##    WindGust    n
## 1        14    4
## 2        15   11
## 3        16   11
## 4        17   16
## 5        18   29
## 6        19   59
## 7        20   69
## 8        21   87
## 9        22   83
## 10       23  107
## 11       24  101
## 12       25   83
## 13       26   62
## 14       27   61
## 15       28   61
## 16       29   37
## 17       30   28
## 18       31   24
## 19       32   18
## 20       33   13
## 21       34   12
## 22       35   14
## 23       36    6
## 24       37    6
## 25       38   10
## 26       39   11
## 27       40    2
## 28       41    3
## 29       42    1
## 30       43    2
## 31       45    2
## 32       NA 7780
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## TempF       1.00  1.00  0.91  0.91     -0.38  -0.48      0.13       0.19
## DewC        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## DewF        0.91  0.91  1.00  1.00     -0.38  -0.48     -0.01       0.07
## Altimeter  -0.38 -0.38 -0.38 -0.38      1.00   0.99     -0.26       0.07
## modSLP     -0.48 -0.48 -0.48 -0.48      0.99   1.00     -0.25       0.04
## WindSpeed   0.13  0.13 -0.01 -0.01     -0.26  -0.25      1.00      -0.01
## Visibility  0.19  0.19  0.07  0.07      0.07   0.04     -0.01       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8890 -0.8172 -0.1578  0.7610  2.7890 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.233e+01  1.406e+00  -44.34   <2e-16 ***
## Altimeter    3.594e-01  4.683e-04  767.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9691 on 8788 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.9853, Adjusted R-squared:  0.9853 
## F-statistic: 5.889e+05 on 1 and 8788 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24130 -0.26737  0.00686  0.25214  1.23326 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.002e+01  5.615e-01  -17.84   <2e-16 ***
## Altimeter    3.428e-01  1.857e-04 1845.63   <2e-16 ***
## TempF       -4.559e-02  1.921e-04 -237.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3561 on 8787 degrees of freedom
##   (23 observations deleted due to missingness)
## Multiple R-squared:  0.998,  Adjusted R-squared:  0.998 
## F-statistic: 2.209e+06 on 2 and 8787 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    6
## 2      0    0      0      0      0      1 1389
## 3      0    0      0      0      1      0  307
## 4      0    0      0      0      1      1  250
## 5      0    0      0      0      2      0   50
## 6      0    0      0      0      2      1   45
## 7      0    0      0      0      3      0    8
## 8      0    0      0      1      0      0  230
## 9      0    0      0      1      0      1   73
## 10     0    0      0      1      1      0   52
## 11     0    0      0      1      1      1   42
## 12     0    0      0      1      2      0   17
## 13     0    0      0      2      0      0   16
## 14     0    0      0      2      0      1    9
## 15     0    0      0      2      1      0    6
## 16     0    0      1      0      0      0  380
## 17     0    0      1      0      0      1   90
## 18     0    0      1      0      1      0   46
## 19     0    0      1      0      1      1   62
## 20     0    0      1      0      2      0    9
## 21     0    0      1      1      0      0   39
## 22     0    0      1      1      0      1   24
## 23     0    0      1      1      1      0   28
## 24     0    0      1      2      0      0   10
## 25     0    0      2      0      0      0   24
## 26     0    0      2      0      0      1    6
## 27     0    0      2      0      1      0    5
## 28     0    0      2      1      0      0    3
## 29     0    0      3      0      0      0    2
## 30     0    1      0      0      0      0   33
## 31     1    0      0      0      0      0 5552
## 
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"                           
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"                                  
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"                          
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"                           
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"                   
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8813   11
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8813 obs. of  7 variables:
##   ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##  $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   2800     NA     NA     NA
##  2    NA   2700     NA     NA     NA
##  3    NA   2600     NA     NA     NA
##  4    NA   2700     NA     NA     NA
##  5    NA   2700     NA   2100     NA
##  6    NA   2700     NA     NA     NA
##  7    NA   2700     NA     NA     NA
##  8    NA   2700     NA     NA     NA
##  9    NA     NA     NA     NA   2600
## 10    NA     NA     NA     NA     NA
## # ... with 8,803 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  5 variables:
##   ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##  $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##  $ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  30 variables:
##  $ METAR     : chr  "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ WindDir   : chr  "300" "000" "000" "280" ...
##  $ WindSpeed : int  5 0 0 3 5 9 0 3 0 0 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##  $ DewC      : int  -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##  $ Altimeter : int  3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##  $ SLP       : int  275 277 277 281 286 289 290 291 295 295 ...
##  $ FahrC     : chr  "T10281067" "T10331067" "T10281067" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ TempF     : num  27 26.1 27 27 27 ...
##  $ DewF      : num  19.9 19.9 19.9 21 19.9 ...
##  $ modSLP    : num  1028 1028 1028 1028 1029 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ numSCT    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA 2100 NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA 2600 NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(klnk2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  30 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ isCLR     : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV      : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   ..$ numSCT    : int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ lowVV     : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   ..$ lowBKN    : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  29 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_logical(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_logical(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  32 variables:
##   ..$ station          : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
##   ..$ valid            : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ tmpf             : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ dwpf             : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ relh             : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
##   ..$ drct             : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
##   ..$ sknt             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ p01i             : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
##   ..$ mslp             : num [1:8813] 1028 1028 1028 1028 1029 ...
##   ..$ vsby             : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8813] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
##   ..$ skyl2            : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8813] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8813], format: NA NA ...
##   ..$ feel             : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
##   ..$ metar            : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ dirW             : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ spdW             : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ gustW            : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  13 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : chr [1:8813] "05" "00" "00" "03" ...
##   ..$ WindGust  : chr [1:8813] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: chr [1:8813] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8813] "M03" "M03" "M03" "M03" ...
##   ..$ DewC      : chr [1:8813] "M07" "M07" "M07" "M06" ...
##   ..$ Altimeter : chr [1:8813] "A3029" "A3030" "A3030" "A3031" ...
##   ..$ SLP       : chr [1:8813] "SLP275" "SLP277" "SLP277" "SLP281" ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  15 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of  16 variables:
##   ..$ METAR     : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ WindDir   : chr [1:8813] "300" "000" "000" "280" ...
##   ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
##   ..$ WindGust  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8813] " " " " " " " " ...
##   ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
##   ..$ DewC      : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
##   ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
##   ..$ SLP       : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
##   ..$ FahrC     : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##   ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##   ..$ TempF     : num [1:8813] 27 26.1 27 27 27 ...
##   ..$ DewF      : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
##   ..$ modSLP    : num [1:8813] 1028 1028 1028 1028 1029 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ isVV  : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
##   .. ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
##   ..$ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8813 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
##   .. ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
##   ..$ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
##   ..$ minCloudLevel  : num [1:8813] 2800 2700 2600 2700 2100 ...

Example #23: Running for a Different Station

The functions can be run for a different station.

Example code includes:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kord_2016.txt"  # file name for raw METAR data
timeZ <- "51Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:51:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Chicago, IL"  # Description of city or location
shortMET <- "KORD METAR (2016)"  # Station code and timing
longMET <- "Chicago, IL O'Hare Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
kord2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_logical(),
##   ice_accretion_3hr = col_logical(),
##   ice_accretion_6hr = col_logical(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 22 parsing failures.
##  row               col           expected actual                                file
## 1678 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1680 ice_accretion_6hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1902 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1903 ice_accretion_1hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## 1903 ice_accretion_6hr 1/0/T/F/TRUE/FALSE   0.01 './RInputFiles/metar_kord_2016.txt'
## .... ................. .................. ...... ...................................
## See problems(...) for more details.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10809 obs. of  29 variables:
##  $ station          : chr  "ORD" "ORD" "ORD" "ORD" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ tmpf             : num  27 27 27 27 27 ...
##  $ dwpf             : num  21.9 21.9 21 21 21 ...
##  $ relh             : num  81 81 78 78 78 ...
##  $ drct             : num  260 260 250 240 250 250 270 260 250 250 ...
##  $ sknt             : num  8 8 8 9 6 7 10 11 8 10 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.2 30.2 30.2 30.2 30.2 ...
##  $ mslp             : num  1024 1024 1024 1024 1024 ...
##  $ vsby             : num  8 9 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##  $ skyl2            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_3hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_6hr: logi  NA NA NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  17.9 17.9 17.9 17.2 19.5 ...
##  $ metar            : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-04-09 03:51:00 UTC" "2016-06-11 20:51:00 UTC"
##  [3] "2016-06-17 23:51:00 UTC" "2016-06-18 00:51:00 UTC"
##  [5] "2016-06-30 15:51:00 UTC" "2016-07-13 14:51:00 UTC"
##  [7] "2016-07-13 15:51:00 UTC" "2016-07-13 16:51:00 UTC"
##  [9] "2016-07-13 17:51:00 UTC" "2016-07-30 01:51:00 UTC"
## [11] "2016-08-05 07:51:00 UTC" "2016-08-07 14:51:00 UTC"
## [13] "2016-11-21 00:51:00 UTC" "2016-11-23 09:51:00 UTC"
## [15] "2016-11-27 18:51:00 UTC" "2016-12-01 11:51:00 UTC"
## [17] "2016-12-05 18:51:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "26008KT" "260" "08" NA  
## [2,] "26008KT" "260" "08" NA  
## [3,] "25008KT" "250" "08" NA  
## [4,] "24009KT" "240" "09" NA  
## [5,] "25006KT" "250" "06" NA  
## [6,] "25007KT" "250" "07" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  512  183  170  247  241  210  181  170  134  197  142   77   74  116  168  157 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  181  269  307  333  365  337  219  280  318  344  369  342  330  282  240  196 
##  320  330  340  350  360  VRB <NA> 
##  206  216  196  197  171  137    1 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  512  531  635  741  821  817  775  775  624  519  473  424  316  233  172  127 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   36   37 <NA> 
##   93   64   53   35   22   17   15    6    5    1    4    1    1    1    1    1 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    5   11   40   44   80  119  109  132  128   97   88   84   71   46   36   30 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G44  G45 
##   27   18   19   15    9    5    3    3    3    2    3    1    1    1    2    2 
##  G46  G52  G54 <NA> 
##    2    1    1 7577 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 1 x 1
##   metar                                                                    
##   <chr>                                                                    
## 1 KORD 061851Z 19012G19 10SM BKN047 BKN250 32/23 A3003 RMK SLP163 T03220228

## Warning: Removed 1 rows containing non-finite values (stat_count).

## Warning: Removed 2 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    10  8805 
## 
## *** Data Not Matched *** 
##  [1] "KORD 100151Z 34014KT 4SM -SN BR BKN008 BKN016 OVC022 00/M01 A2976 RMK T00001011"
##  [2] "KORD 100251Z 34016KT 3SM -SN BR OVC011 M01/M02 A2975 RMK T10111022"             
##  [3] "KORD 071551Z 00000KT 10SM SCT030 SCT060 BKN250 27/15 A3007"                     
##  [4] "KORD 071651Z 13005KT 10SM FEW030 BKN060 BKN250 28/17 A3007"                     
##  [5] "KORD 071751Z 10005KT 10SM SCT048 BKN220 BKN250 30/16 3006"                      
##  [6] "KORD 071851Z 10009KT 10SM SCT050 BKN230 BKN250 31/14 A3005"                     
##  [7] "KORD 061851Z 19012G19 10SM BKN047 BKN250 32/23 A3003 RMK SLP163 T03220228"      
##  [8] "KORD 261151Z 00000KT 10SM OVC029 02/M01 A3034"                                  
##  [9] "KORD 280351Z 18012KT 10SM BKN075 OVC100 06/02 A2976"                            
## [10] "KORD 210451Z 25009KT 10SM BKN250 M04/M08 A3014"                                 
## 
## *** Parsing matrix summary *** 
## [1] 8815   11
##      [,1]                                                                  
## [1,] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056"       
## [2,] "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056"       
## [3,] "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061"      
## [4,] "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061"      
## [5,] "51Z 25006KT 10SM OVC017 M03/M06 A3021 RMK AO2 SLP239 T10281061"      
## [6,] "51Z 25007KT 10SM OVC017 M03/M06 A3020 RMK AO2 SLP237 4/002 T10281061"
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "260" "08" NA   " "  "8SM"  "M03" "M06" "A3021" "SLP239" "T10281056"
## [2,] "260" "08" NA   " "  "9SM"  "M03" "M06" "A3021" "SLP240" "T10281056"
## [3,] "250" "08" NA   " "  "10SM" "M03" "M06" "A3021" "SLP239" "T10281061"
## [4,] "240" "09" NA   " "  "10SM" "M03" "M06" "A3022" "SLP242" "T10281061"
## [5,] "250" "06" NA   " "  "10SM" "M03" "M06" "A3021" "SLP239" "T10281061"
## [6,] "250" "07" NA   " "  "10SM" "M03" "M06" "A3020" "SLP237" "T10281061"
## 
## *** Summary of the parsed data *** 
## Observations: 8,815
## Variables: 13
## $ METAR      <chr> "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10...
## $ WindDir    <chr> "260", "260", "250", "240", "250", "250", "270", "260", ...
## $ WindSpeed  <chr> "08", "08", "08", "09", "06", "07", "10", "11", "08", "1...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "8SM", "9SM", "10SM", "10SM", "10SM", "10SM", "10SM", "1...
## $ TempC      <chr> "M03", "M03", "M03", "M03", "M03", "M03", "M03", "M03", ...
## $ DewC       <chr> "M06", "M06", "M06", "M06", "M06", "M06", "M06", "M06", ...
## $ Altimeter  <chr> "A3021", "A3021", "A3021", "A3022", "A3021", "A3020", "A...
## $ SLP        <chr> "SLP239", "SLP240", "SLP239", "SLP242", "SLP239", "SLP23...
## $ FahrC      <chr> "T10281056", "T10281056", "T10281061", "T10281061", "T10...
## $ dtime      <dttm> 2015-12-31 00:51:00, 2015-12-31 01:51:00, 2015-12-31 02...
## $ origMETAR  <chr> "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 S...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  15 variables:
##  $ METAR     : chr  "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ WindDir   : chr  "260" "260" "250" "240" ...
##  $ WindSpeed : int  8 8 8 9 6 7 10 11 8 10 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  8 9 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##  $ DewC      : int  -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##  $ SLP       : int  239 240 239 242 239 237 237 242 244 239 ...
##  $ FahrC     : chr  "T10281056" "T10281056" "T10281061" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ TempF     : num  27 27 27 27 27 ...
##  $ DewF      : num  21.9 21.9 21 21 21 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 260             8       NA " "            8    -3    -6      3021   239
## 2 51Z ~ 260             8       NA " "            9    -3    -6      3021   240
## 3 51Z ~ 250             8       NA " "           10    -3    -6      3021   239
## 4 51Z ~ 240             9       NA " "           10    -3    -6      3022   242
## 5 51Z ~ 250             6       NA " "           10    -3    -6      3021   239
## 6 51Z ~ 250             7       NA " "           10    -3    -6      3020   237
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 200             8       NA " "           10     3    -7      3010   202
## 2 51Z ~ 230             8       NA " "           10     3    -7      3011   204
## 3 51Z ~ 200             6       NA " "           10     4    -7      3011   203
## 4 51Z ~ 000             0       NA " "           10     3    -8      3009   199
## 5 51Z ~ 000             0       NA " "           10     1    -7      3012   208
## 6 51Z ~ 000             0       NA " "           10     1    -7      3014   217
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 51Z ~ 090             9       NA " "           10    21    19      2982    93
##  2 51Z ~ 230             7       NA " "           10    27     9      2982    97
##  3 51Z ~ 260            12       NA " "           10    -4    -8      3001   169
##  4 51Z ~ 180             8       NA " "           10    16    11      2979    83
##  5 51Z ~ 260            18       NA " "           10    -9   -15      3011   211
##  6 51Z ~ 040             7       NA " "            4    20    19      2987   111
##  7 51Z ~ 200            12       NA " "           10    26    16      3018   216
##  8 51Z ~ 250             4       NA " "           10    -3    -7      3014   215
##  9 51Z ~ 260             6       NA " "           10    19    17      2983    96
## 10 51Z ~ 140             8       NA " "           10    19    14      3009   188
## 11 51Z ~ 040             4       NA " "           10    18    14      3024   236
## 12 51Z ~ 050             8       NA " "           10    16     7      3020   227
## 13 51Z ~ 290            10       NA " "           10     1    -4      3029   263
## 14 51Z ~ 340             6       NA " "            8    15    14      2999   156
## 15 51Z ~ 290            10       NA " "           10   -18   -23      3036   298
## 16 51Z ~ 260             8       NA " "           10    18    16      2964    33
## 17 51Z ~ 030             8       NA " "           10     7     3      3003   172
## 18 51Z ~ 130             8       NA " 1 ~          2     3     3      2981   100
## 19 51Z ~ 260             7       NA " "           10    -3    -7      3034   281
## 20 51Z ~ 170             8       NA " "           10    21    14      2987   110
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         10         10         10       8815         10         10         10 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         10         10         10         10          0          0         10 
##       DewF 
##         10

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 20 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0         3
##  2      0.125     8
##  3      0.25     22
##  4      0.5      21
##  5      0.75     19
##  6      1        27
##  7      1.25     22
##  8      1.5      34
##  9      1.75     30
## 10      2        64
## 11      2.5      54
## 12      3        99
## 13      4       108
## 14      5       168
## 15      6       169
## 16      7       179
## 17      8       184
## 18      9       284
## 19     10      7310
## 20     NA        10
##    WindGust    n
## 1        14    5
## 2        15   10
## 3        16   39
## 4        17   44
## 5        18   80
## 6        19  119
## 7        20  109
## 8        21  132
## 9        22  127
## 10       23   97
## 11       24   88
## 12       25   84
## 13       26   71
## 14       27   46
## 15       28   36
## 16       29   30
## 17       30   27
## 18       31   18
## 19       32   19
## 20       33   15
## 21       34    9
## 22       35    5
## 23       36    3
## 24       37    3
## 25       38    3
## 26       39    2
## 27       40    3
## 28       41    1
## 29       42    1
## 30       43    1
## 31       44    2
## 32       45    2
## 33       46    2
## 34       52    1
## 35       54    1
## 36       NA 7580
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8805 complete cases (99.9% of 8815 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.93  0.93     -0.22  -0.29     -0.12       0.19
## TempF       1.00  1.00  0.93  0.93     -0.22  -0.29     -0.12       0.19
## DewC        0.93  0.93  1.00  1.00     -0.27  -0.34     -0.19       0.05
## DewF        0.93  0.93  1.00  1.00     -0.28  -0.34     -0.19       0.05
## Altimeter  -0.22 -0.22 -0.27 -0.28      1.00   1.00     -0.31       0.19
## modSLP     -0.29 -0.29 -0.34 -0.34      1.00   1.00     -0.29       0.17
## WindSpeed  -0.12 -0.12 -0.19 -0.19     -0.31  -0.29      1.00       0.01
## Visibility  0.19  0.19  0.05  0.05      0.19   0.17      0.01       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.31041 -0.47561 -0.07852  0.43888  1.66927 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.312e+01  8.598e-01  -26.89   <2e-16 ***
## Altimeter    3.464e-01  2.864e-04 1209.37   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5573 on 8803 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.994,  Adjusted R-squared:  0.994 
## F-statistic: 1.463e+06 on 1 and 8803 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69460 -0.12269  0.00055  0.12142  0.73628 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.603e+00  2.864e-01  -16.07   <2e-16 ***
## Altimeter    3.407e-01  9.502e-05 3585.30   <2e-16 ***
## TempF       -2.606e-02  9.503e-05 -274.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1804 on 8802 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9994 
## F-statistic: 7.018e+06 on 2 and 8802 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      0    1
## 2      0    0      0      0      0      1 1404
## 3      0    0      0      0      1      0  515
## 4      0    0      0      0      1      1  551
## 5      0    0      0      0      2      0  236
## 6      0    0      0      0      2      1  142
## 7      0    0      0      0      3      0   49
## 8      0    0      0      0      3      1    9
## 9      0    0      0      0      4      0    4
## 10     0    0      0      1      0      0  381
## 11     0    0      0      1      0      1  144
## 12     0    0      0      1      1      0  234
## 13     0    0      0      1      1      1  193
## 14     0    0      0      1      2      0  114
## 15     0    0      0      1      2      1   21
## 16     0    0      0      1      3      0   21
## 17     0    0      0      1      3      1    1
## 18     0    0      0      2      0      0  173
## 19     0    0      0      2      0      1   16
## 20     0    0      0      2      1      0   61
## 21     0    0      0      2      1      1   14
## 22     0    0      0      2      2      0   14
## 23     0    0      0      3      0      0   17
## 24     0    0      1      0      0      0 1050
## 25     0    0      1      0      0      1  240
## 26     0    0      1      0      1      0  359
## 27     0    0      1      0      1      1  217
## 28     0    0      1      0      2      0  130
## 29     0    0      1      0      2      1   19
## 30     0    0      1      0      3      0   14
## 31     0    0      1      1      0      0  323
## 32     0    0      1      1      0      1  105
## 33     0    0      1      1      1      0  226
## 34     0    0      1      1      1      1   39
## 35     0    0      1      1      2      0   54
## 36     0    0      1      2      0      0   65
## 37     0    0      1      2      0      1    3
## 38     0    0      1      2      1      0   17
## 39     0    0      1      2      1      1    1
## 40     0    0      1      2      2      0    2
## 41     0    0      1      3      0      0    2
## 42     0    0      2      0      0      0  431
## 43     0    0      2      0      0      1   37
## 44     0    0      2      0      1      0  109
## 45     0    0      2      0      1      1   17
## 46     0    0      2      0      2      0    9
## 47     0    0      2      1      0      0   96
## 48     0    0      2      1      0      1    6
## 49     0    0      2      1      1      0   42
## 50     0    0      2      1      1      1    4
## 51     0    0      2      1      2      0    1
## 52     0    0      2      2      0      0    9
## 53     0    0      3      0      0      0    5
## 54     0    0      4      0      0      0    1
## 55     0    1      0      0      0      0   55
## 56     1    0      0      0      0      0  812
## 
## *** METAR records where no clouds were extracted ***
## [1] "KORD 071951Z AUTO 07005KT 10SM 28/14 A3004 RMK AO2 SLP169 T02780139 $"

## 
## *** Dimensions for the cloud matrix ***
## [1] 8815   14
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8815 obs. of  7 variables:
##   ..$ isCLR : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV  : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ numOVC: int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##  $ mtxCloud : chr [1:8815, 1:14] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,815 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   1600     NA     NA     NA
##  2    NA   1600     NA     NA     NA
##  3    NA   1500     NA     NA     NA
##  4    NA   1900     NA     NA     NA
##  5    NA   1700     NA     NA     NA
##  6    NA   1700     NA     NA     NA
##  7    NA   1600     NA     NA     NA
##  8    NA   1500     NA     NA     NA
##  9    NA   2000     NA     NA     NA
## 10    NA     NA   1600     NA     NA
## # ... with 8,805 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  5 variables:
##   ..$ lowVV : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   ..$ lowBKN: num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   ..$ lowSCT: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##  $ minCloudLevel  : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  30 variables:
##  $ METAR     : chr  "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ WindDir   : chr  "260" "260" "250" "240" ...
##  $ WindSpeed : int  8 8 8 9 6 7 10 11 8 10 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  8 9 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##  $ DewC      : int  -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##  $ SLP       : int  239 240 239 242 239 237 237 242 244 239 ...
##  $ FahrC     : chr  "T10281056" "T10281056" "T10281061" "T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ TempF     : num  27 27 27 27 27 ...
##  $ DewF      : num  21.9 21.9 21 21 21 ...
##  $ modSLP    : num  1024 1024 1024 1024 1024 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 1 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA 1600 ...
##  $ lowSCT    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 3 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kord2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  30 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ modSLP    : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ isCLR     : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV      : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ numOVC    : int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##   ..$ lowVV     : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   ..$ lowBKN    : num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   ..$ lowSCT    : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 3 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  29 variables:
##   ..$ station          : chr [1:8815] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8815] 27 27 27 27 27 ...
##   ..$ dwpf             : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ relh             : num [1:8815] 81 81 78 78 78 ...
##   ..$ drct             : num [1:8815] 260 260 250 240 250 250 270 260 250 250 ...
##   ..$ sknt             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ p01i             : chr [1:8815] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8815] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8815] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8815] NA NA NA NA ...
##   ..$ skyl1            : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ skyl2            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8815] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8815], format: NA NA ...
##   ..$ feel             : num [1:8815] 17.9 17.9 17.9 17.2 19.5 ...
##   ..$ metar            : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..- attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame':  22 obs. of  5 variables:
##   .. ..$ row     : int [1:22] 1678 1680 1902 1903 1903 2919 2921 2922 2929 2934 ...
##   .. ..$ col     : chr [1:22] "ice_accretion_1hr" "ice_accretion_6hr" "ice_accretion_1hr" "ice_accretion_1hr" ...
##   .. ..$ expected: chr [1:22] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
##   .. ..$ actual  : chr [1:22] "0.01" "0.01" "0.01" "0.01" ...
##   .. ..$ file    : chr [1:22] "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" "'./RInputFiles/metar_kord_2016.txt'" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_logical(),
##   .. ..   ice_accretion_3hr = col_logical(),
##   .. ..   ice_accretion_6hr = col_logical(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  32 variables:
##   ..$ station          : chr [1:8815] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8815] 27 27 27 27 27 ...
##   ..$ dwpf             : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ relh             : num [1:8815] 81 81 78 78 78 ...
##   ..$ drct             : num [1:8815] 260 260 250 240 250 250 270 260 250 250 ...
##   ..$ sknt             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ p01i             : chr [1:8815] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8815] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8815] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8815] "OVC" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8815] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8815] NA NA NA NA ...
##   ..$ skyl1            : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ skyl2            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8815] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8815] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8815], format: NA NA ...
##   ..$ feel             : num [1:8815] 17.9 17.9 17.9 17.2 19.5 ...
##   ..$ metar            : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ dirW             : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ spdW             : num [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ gustW            : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  13 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : chr [1:8815] "08" "08" "08" "09" ...
##   ..$ WindGust  : chr [1:8815] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: chr [1:8815] "8SM" "9SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8815] "M03" "M03" "M03" "M03" ...
##   ..$ DewC      : chr [1:8815] "M06" "M06" "M06" "M06" ...
##   ..$ Altimeter : chr [1:8815] "A3021" "A3021" "A3021" "A3022" ...
##   ..$ SLP       : chr [1:8815] "SLP239" "SLP240" "SLP239" "SLP242" ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  15 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8815 obs. of  16 variables:
##   ..$ METAR     : chr [1:8815] "51Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "51Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "51Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061" "51Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ WindDir   : chr [1:8815] "260" "260" "250" "240" ...
##   ..$ WindSpeed : int [1:8815] 8 8 8 9 6 7 10 11 8 10 ...
##   ..$ WindGust  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8815] " " " " " " " " ...
##   ..$ Visibility: num [1:8815] 8 9 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8815] -3 -3 -3 -3 -3 -3 -3 -3 -3 -3 ...
##   ..$ DewC      : int [1:8815] -6 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8815] 3021 3021 3021 3022 3021 3020 3021 3022 3022 3021 ...
##   ..$ SLP       : int [1:8815] 239 240 239 242 239 237 237 242 244 239 ...
##   ..$ FahrC     : chr [1:8815] "T10281056" "T10281056" "T10281061" "T10281061" ...
##   ..$ dtime     : POSIXct[1:8815], format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8815] "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##   ..$ TempF     : num [1:8815] 27 27 27 27 27 ...
##   ..$ DewF      : num [1:8815] 21.9 21.9 21 21 21 ...
##   ..$ modSLP    : num [1:8815] 1024 1024 1024 1024 1024 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ isVV  : num [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8815] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8815] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ numOVC: int [1:8815] 1 1 1 1 1 1 1 1 1 0 ...
##   ..$ mtxCloud : chr [1:8815, 1:14] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8815 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 NA ...
##   .. ..$ lowBKN: num [1:8815] NA NA NA NA NA NA NA NA NA 1600 ...
##   .. ..$ lowSCT: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8815] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...
##   ..$ minCloudLevel  : num [1:8815] 1600 1600 1500 1900 1700 1700 1600 1500 2000 1600 ...

Example #24: Downloading the Data by Function

The Iowa State Mesonet website provides instructions for automating the download process using Python or R, including links to a GitHub repository with instructions for R download. The code can be adapted for use here.

Example code includes:

# Function to get ASOS data from Iowa State
getASOSData <- function(faaID, startDate, endDate, suffix,
                        dirDownload = "./RInputFiles/", getAgain=FALSE) {

    # Create the file name and location for saving data
    fileName <- paste0("metar_k", str_to_lower(faaID), "_", suffix, ".txt")
    fileLoc <- paste0(dirDownload, fileName)

    # Check whether the file already exists, stop if so and getAgain is FALSE, otherwise download the data
    if (file.exists(fileLoc) & !getAgain) {
        cat("\nFile already exists - ", fileLoc)
        cat("\nStopping download routine\n")
        return(FALSE)
    } else {
        # Get the year, day, and hour of the key dates
        y1 <- lubridate::year(startDate)
        m1 <- lubridate::month(startDate)
        d1 <- lubridate::day(startDate)
    
        y2 <- lubridate::year(endDate)
        m2 <- lubridate::month(endDate)
        d2 <- lubridate::day(endDate)

        # Mimic the string shown below
        # https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=LAS&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2
    
        baseURL <- "https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"  # base URL
        useURL <- paste0(baseURL, "station=", faaID)  # add the desired station
        useURL <- paste0(useURL, "&data=all")  # request all data fields
        useURL <- paste0(useURL, "&year1=", y1, "&month1=", m1, "&day1=", d1)  # Specify start ymd
        useURL <- paste0(useURL, "&year2=", y2, "&month2=", m2, "&day2=", d2)  # Specify end ymd
        useURL <- paste0(useURL, "&tz=Etc%2FUTC&format=onlycomma&latlon=no")  # Formatting
        useURL <- paste0(useURL, "&missing=M&trace=T&direct=no&report_type=2")  # Formatting
    
        # Download the file
        download.file(useURL, destfile=fileLoc, method="curl")
        
        return(TRUE)
    }
}

And then cache the actual download step to minimize utilization of the Iowa State server (though the code is set to not download to an existing file anyway):

# Specify the FAA ID and Analysis Year
useFAAID <- "LAS"
analysisYear <- 2016

# Specify the start and end dates based on the analysis year
startDate <- ISOdate(analysisYear-1, 12, 31, hour=0)
endDate <- ISOdate(analysisYear+1, 1, 2, hour=0)

# Get the relevant data
getASOSData(faaID=useFAAID, startDate=startDate, endDate=endDate, suffix=analysisYear)
## [1] TRUE

And then explore the data to set the key parameters for a full run of the METAR process:

# Set key parameters for reading and interpreting METAR based on above inputs
fname <- paste0("./RInputFiles/metar_k", str_to_lower(useFAAID), "_", analysisYear, ".txt")

# Find the most common Zulu time (this will be the METAR)
zTimes <- readr::read_csv(fname) %>%
    pull(metar) %>%
    str_match(pattern="\\d{2}Z") %>%
    as.vector() %>%
    table() %>%
    sort(decreasing=TRUE)
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   valid = col_datetime(format = ""),
##   tmpf = col_double(),
##   dwpf = col_double(),
##   relh = col_double(),
##   sknt = col_double(),
##   alti = col_double(),
##   vsby = col_double(),
##   feel = col_double()
## )
## See spec(...) for full column specifications.
## Warning: 64 parsing failures.
##  row  col expected actual                                file
## 1585 relh a double      M './RInputFiles/metar_klas_2016.txt'
## 1585 sknt a double      M './RInputFiles/metar_klas_2016.txt'
## 1585 feel a double      M './RInputFiles/metar_klas_2016.txt'
## 1869 relh a double      M './RInputFiles/metar_klas_2016.txt'
## 1869 sknt a double      M './RInputFiles/metar_klas_2016.txt'
## .... .... ........ ...... ...................................
## See problems(...) for more details.
cat("\nThe most common Zulu time is", names(zTimes)[1], "\nFrequency is",
    round(100*zTimes[1]/sum(zTimes), 1), "% (", zTimes[1], "of", sum(zTimes), ")"
    )
## 
## The most common Zulu time is 56Z 
## Frequency is 97.7 % ( 8818 of 9027 )
# Use the most common Zulu time and start/end dates to set key parameters
timeZ <- names(zTimes)[1]  # Zulu time that METAR is recorded at this station
expMin <- ISOdate(analysisYear-1, 12, 31, hour=0, min=as.integer(str_replace(timeZ, "Z", "")))
expDays <- as.integer(endDate - startDate)  # Expected total days read

# Provide a descriptive name
locMET <- "Las Vegas, NV"  # Description of city or location
shortMET <- "KLAS METAR (2016)"  # Station code and timing
longMET <- "Las Vegas, NV Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Lincoln, NE
klas2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_logical(),
##   ice_accretion_3hr = col_logical(),
##   ice_accretion_6hr = col_logical(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 9027 obs. of  29 variables:
##  $ station          : chr  "LAS" "LAS" "LAS" "LAS" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ tmpf             : num  46.9 44.1 43 41 39.9 ...
##  $ dwpf             : num  18 17.1 16 16 17.1 ...
##  $ relh             : num  31.1 33.4 33.2 35.9 39.2 ...
##  $ drct             : num  100 10 10 340 210 230 230 0 0 210 ...
##  $ sknt             : num  3 5 5 5 3 3 3 0 0 3 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.1 30.1 30.1 30.1 30.1 ...
##  $ mslp             : num  1020 1020 1021 1022 1022 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "CLR" "CLR" "CLR" "CLR" ...
##  $ skyc2            : chr  NA NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl2            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_3hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_6hr: logi  NA NA NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  45.8 40.7 39.4 37.1 37.7 ...
##  $ metar            : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-06-17 23:56:00 UTC" "2016-06-18 00:56:00 UTC"
##  [3] "2016-07-13 14:56:00 UTC" "2016-07-13 15:56:00 UTC"
##  [5] "2016-07-13 16:56:00 UTC" "2016-07-13 17:56:00 UTC"
##  [7] "2016-08-05 07:56:00 UTC" "2016-08-12 17:56:00 UTC"
##  [9] "2016-08-23 14:56:00 UTC" "2016-08-24 00:56:00 UTC"
## [11] "2016-08-24 01:56:00 UTC" "2016-11-21 00:56:00 UTC"
## [13] "2016-12-03 09:56:00 UTC" "2016-12-03 10:56:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "10003KT" "100" "03" NA  
## [2,] "01005KT" "010" "05" NA  
## [3,] "01005KT" "010" "05" NA  
## [4,] "34005KT" "340" "05" NA  
## [5,] "21003KT" "210" "03" NA  
## [6,] "23003KT" "230" "03" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
## 1397  152  172  161  162  196  164  167  172  119  101   79   51   49   70  100 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  186  297  436  629  594  543  512  348  200  135  118  118  112   81   51   59 
##  320  330  340  350  360  VRB <NA> 
##   76  145  126  113  104  504   19 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
## 1397 1349 1247  963  727  500  474  375  386  301  244  192  161  121   88   58 
##   18   19   20   21   22   23   24   25   26   27   28   29   31 <NA> 
##   67   35   41   28   14   12    7    3    2    2    3    1    1   19 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##   23   80   94  146  128  130  116  106  111   77   82   73   53   38   28   28 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G44  G45 <NA> 
##   23   20    9   11   13    6   10    3    1    5    1    1    2    2 7398 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 19 x 1
##    metar                                                                        
##    <chr>                                                                        
##  1 KLAS 042156Z 10SM FEW200 BKN250 26/M09 A2988 RMK AO2 SLP100 T02561089        
##  2 KLAS 161456Z 10SM FEW250 14/M06 A3010 RMK AO2 SLP180 T01441061 51021         
##  3 KLAS 022256Z 10SM FEW090 SCT250 26/M01 A2997 RMK AO2 SLP134 T02611006        
##  4 KLAS 032056Z 10SM FEW250 28/01 A3000 RMK AO2 SLP138 T02780006 57020          
##  5 KLAS 132156Z 10SM FEW120 36/M03 A2989 RMK AO2 SLP095 T03611033               
##  6 KLAS 151756Z 10SM FEW250 27/M01 A2970 RMK AO2 SLP030 T02671006 10272 20217 5~
##  7 KLAS 182156Z 10SM SCT130 SCT200 29/02 A2987 RMK AO2 SLP093 T02940022         
##  8 KLAS 282056Z 10SM SCT120 BKN250 32/M03 A2979 RMK AO2 SLP061 T03171028 56016  
##  9 KLAS 131856Z 10SM FEW120 32/01 A2993 RMK AO2 SLP108 T03170011                
## 10 KLAS 221756Z 10SM FEW180 FEW250 38/M01 A2984 RMK AO2 SLP069 T03781006 10383 ~
## 11 KLAS 281856Z 10SM FEW095 SCT130 SCT200 40/04 A2995 RMK AO2 SLP106 T04000044  
## 12 KLAS 012256Z 10SM FEW100 FEW160 SCT250 35/13 A2981 RMK AO2 SLP068 TCU SW AND~
## 13 KLAS 241656Z 9SM CLR 38/02 A2986 RMK AO2 SLP077 FU ALQDS VIS LWR S T03780022 
## 14 KLAS 011956Z 10SM SCT120 SCT250 37/16 A2990 RMK AO2 SLP095 TCU DSNT N AND W ~
## 15 KLAS 061856Z 10SM FEW100 FEW140 36/09 A2982 RMK AO2 SLP066 T03610094         
## 16 KLAS 112156Z 10SM FEW110 FEW200 37/00 A2992 RMK AO2 SLP100 CB DSNT SE ACC DS~
## 17 KLAS 222256Z 10SM SCT120 SCT200 36/04 A2978 RMK AO2 SLP055 CB DSNT SE ACC DS~
## 18 KLAS 092156Z 10SM FEW120 37/M01 A2993 RMK AO2 SLP106 T03721006               
## 19 KLAS 130456Z 10SM CLR 29/01 A2968 RMK AO2 SLP023 T02890006

## Warning: Removed 19 rows containing non-finite values (stat_count).

## Warning: Removed 1 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    35  8783 
## 
## *** Data Not Matched *** 
##  [1] "KLAS 042156Z 10SM FEW200 BKN250 26/M09 A2988 RMK AO2 SLP100 T02561089"                                                                          
##  [2] "KLAS 161456Z 10SM FEW250 14/M06 A3010 RMK AO2 SLP180 T01441061 51021"                                                                           
##  [3] "KLAS 022256Z 10SM FEW090 SCT250 26/M01 A2997 RMK AO2 SLP134 T02611006"                                                                          
##  [4] "KLAS 032056Z 10SM FEW250 28/01 A3000 RMK AO2 SLP138 T02780006 57020"                                                                            
##  [5] "KLAS 132156Z 10SM FEW120 36/M03 A2989 RMK AO2 SLP095 T03611033"                                                                                 
##  [6] "KLAS 151756Z 10SM FEW250 27/M01 A2970 RMK AO2 SLP030 T02671006 10272 20217 51002"                                                               
##  [7] "KLAS 182156Z 10SM SCT130 SCT200 29/02 A2987 RMK AO2 SLP093 T02940022"                                                                           
##  [8] "KLAS 282056Z 10SM SCT120 BKN250 32/M03 A2979 RMK AO2 SLP061 T03171028 56016"                                                                    
##  [9] "KLAS 131856Z 10SM FEW120 32/01 A2993 RMK AO2 SLP108 T03170011"                                                                                  
## [10] "KLAS 221756Z 10SM FEW180 FEW250 38/M01 A2984 RMK AO2 SLP069 T03781006 10383 20306 50003"                                                        
## [11] "KLAS 281856Z 10SM FEW095 SCT130 SCT200 40/04 A2995 RMK AO2 SLP106 T04000044"                                                                    
## [12] "KLAS 012256Z 10SM FEW100 FEW160 SCT250 35/13 A2981 RMK AO2 SLP068 TCU SW AND DSNT S T03500133"                                                  
## [13] "KLAS 241656Z 9SM CLR 38/02 A2986 RMK AO2 SLP077 FU ALQDS VIS LWR S T03780022"                                                                   
## [14] "KLAS 011956Z 10SM SCT120 SCT250 37/16 A2990 RMK AO2 SLP095 TCU DSNT N AND W AND NW ACC DSNT S-SW T03670156"                                     
## [15] "KLAS 061856Z 10SM FEW100 FEW140 36/09 A2982 RMK AO2 SLP066 T03610094"                                                                           
## [16] "KLAS 112156Z 10SM FEW110 FEW200 37/00 A2992 RMK AO2 SLP100 CB DSNT SE ACC DSNT E T03720000"                                                     
## [17] "KLAS 222256Z 10SM SCT120 SCT200 36/04 A2978 RMK AO2 SLP055 CB DSNT SE ACC DSNT ALQDS T03610044"                                                 
## [18] "KLAS 231556Z 00000KT 10SM FEW150 SCT180 26/14 A3005 RMK SLPNO T02570144"                                                                        
## [19] "KLAS 231656Z 10003KT 10SM FEW180 28/14 A3006 RMK SLPNO T02790143"                                                                               
## [20] "KLAS 231756Z 13004KT 10SM FEW100 FEW180 27/14 A3006 RMK SLPNO T02730139"                                                                        
## [21] "KLAS 231856Z 00000KT 10SM FEW100 30/13 A3004 RMK SLPNO T03010127"                                                                               
## [22] "KLAS 231956Z 00000KT 10SM FEW100 31/13 A3002 RMK SLPNO T03130131"                                                                               
## [23] "KLAS 232056Z 03004KT 10SM FEW100 31/13 A2999 RMK SLPNO CB DSNT NE T03080131"                                                                    
## [24] "KLAS 232156Z 10005KT 10SM FEW100 32/13 A2996 RMK SLPNO TCU DSNT N AND NW T03160132"                                                             
## [25] "KLAS 232256Z 08005KT 10SM FEW100 32/13 A2994 RMK SLPNO TCU DSNT W-NW T03210134"                                                                 
## [26] "KLAS 232356Z 05006KT 10SM FEW100 FEW250 32/14 A2992 RMK SLPNO ACC W T03240136"                                                                  
## [27] "KLAS 240256Z 01007KT 10SM FEW100 FEW250 32/11 A2994 RMK AO2 SLPNO CB DSNT N T03220106 53004 $"                                                  
## [28] "KLAS 240356Z 05011KT 10SM FEW100 FEW250 32/11 A2997 RMK AO2 SLPNO T03170106 $"                                                                  
## [29] "KLAS 240456Z 02007KT 10SM SCT100 SCT250 31/12 A2998 RMK AO2 SLPNO T03060122 $"                                                                  
## [30] "KLAS 240556Z 34007KT 10SM SCT120 BKN160 BKN250 31/11 A3000 RMK AO2 SLPNO OCNL LTGIC DSNT NW T030600111 10339 20300 51018 $"                     
## [31] "KLAS 240656Z VRB03KT 10SM -TSRA FEW030 SCT095CB BKN150 27/13 A3001 RMK AO2 RAB40 TSB38 SLPNO OCNL LTGICCG W-N TS W-N MOV S P0003 T02720133 $"   
## [32] "KLAS 240756Z 26010KT 10SM TS FEW050 FEW110CB BKN150 27/13 A2999 RMK AO2 RAE29 SLPNO OCNL LTGICCG S-SW TS S-SW MOV S P0000 T02720133 403390267 $"
## [33] "KLAS 092156Z 10SM FEW120 37/M01 A2993 RMK AO2 SLP106 T03721006"                                                                                 
## [34] "KLAS 130456Z 10SM CLR 29/01 A2968 RMK AO2 SLP023 T02890006"                                                                                     
## [35] "KLAS 071456Z 02010KT 10SM BKN250 07/M13 A3011"                                                                                                  
## 
## *** Parsing matrix summary *** 
## [1] 8818   11
##      [,1]                                                         [,2]  [,3]
## [1,] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "100" "03"
## [2,] "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "010" "05"
## [3,] "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "010" "05"
## [4,] "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" "340" "05"
## [5,] "56Z 21003KT 10SM CLR 04/M08 A3015 RMK AO2 SLP216 T00441083" "210" "03"
## [6,] "56Z 23003KT 10SM CLR 03/M08 A3016 RMK AO2 SLP219 T00331083" "230" "03"
##      [,4] [,5] [,6]   [,7] [,8]  [,9]    [,10]    [,11]      
## [1,] NA   " "  "10SM" "08" "M08" "A3009" "SLP196" "T00831078"
## [2,] NA   " "  "10SM" "07" "M08" "A3011" "SLP204" "T00671083"
## [3,] NA   " "  "10SM" "06" "M09" "A3013" "SLP210" "T00611089"
## [4,] NA   " "  "10SM" "05" "M09" "A3014" "SLP215" "T00501089"
## [5,] NA   " "  "10SM" "04" "M08" "A3015" "SLP216" "T00441083"
## [6,] NA   " "  "10SM" "03" "M08" "A3016" "SLP219" "T00331083"
## 
## *** Summary of the parsed data *** 
## Observations: 8,818
## Variables: 13
## $ METAR      <chr> "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831...
## $ WindDir    <chr> "100", "010", "010", "340", "210", "230", "230", "000", ...
## $ WindSpeed  <chr> "03", "05", "05", "05", "03", "03", "03", "00", "00", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "08", "07", "06", "05", "04", "03", "02", "02", "01", "0...
## $ DewC       <chr> "M08", "M08", "M09", "M09", "M08", "M08", "M09", "M09", ...
## $ Altimeter  <chr> "A3009", "A3011", "A3013", "A3014", "A3015", "A3016", "A...
## $ SLP        <chr> "SLP196", "SLP204", "SLP210", "SLP215", "SLP216", "SLP21...
## $ FahrC      <chr> "T00831078", "T00671083", "T00611089", "T00501089", "T00...
## $ dtime      <dttm> 2015-12-31 00:56:00, 2015-12-31 01:56:00, 2015-12-31 02...
## $ origMETAR  <chr> "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP1...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  15 variables:
##  $ METAR     : chr  "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ WindDir   : chr  "100" "010" "010" "340" ...
##  $ WindSpeed : int  3 5 5 5 3 3 3 0 0 3 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  8 7 6 5 4 3 2 2 1 1 ...
##  $ DewC      : int  -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##  $ Altimeter : int  3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##  $ SLP       : int  196 204 210 215 216 219 221 222 226 229 ...
##  $ FahrC     : chr  "T00831078" "T00671083" "T00611089" "T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ TempF     : num  46.9 44.1 43 41 39.9 ...
##  $ DewF      : num  18 17.1 16 16 17.1 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 56Z ~ 100             3       NA " "           10     8    -8      3009   196
## 2 56Z ~ 010             5       NA " "           10     7    -8      3011   204
## 3 56Z ~ 010             5       NA " "           10     6    -9      3013   210
## 4 56Z ~ 340             5       NA " "           10     5    -9      3014   215
## 5 56Z ~ 210             3       NA " "           10     4    -8      3015   216
## 6 56Z ~ 230             3       NA " "           10     3    -8      3016   219
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 56Z ~ 000             0       NA " "           10    12     4      2979    84
## 2 56Z ~ 170             4       NA " "           10    13     3      2975    71
## 3 56Z ~ 150             6       NA " "           10    14     3      2973    62
## 4 56Z ~ VRB             5       NA " "           10    15     2      2970    50
## 5 56Z ~ 180            10       NA " "           10    15     2      2970    51
## 6 56Z ~ 180            11       NA " "           10    13     3      2971    56
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 56Z ~ 220            11       NA " "           10    41    -3      2971    25
##  2 56Z ~ 000             0       NA " "           10    19     2      2975    58
##  3 56Z ~ 290             4       NA " "           10    12    -8      2988   112
##  4 56Z ~ 220            13       NA " "           10    19    -4      2958   997
##  5 56Z ~ 010             3       NA " "           10    13     2      3011   192
##  6 56Z ~ 200             5       NA " "           10    31    -4      2981    61
##  7 56Z ~ 000             0       NA " "           10    22    14      3002   139
##  8 56Z ~ 000             0       NA " "           10    10    -9      3032   273
##  9 56Z ~ 260             4       NA " "           10    28     1      2997   117
## 10 56Z ~ 220             3       NA " "           10    28    -4      2993   111
## 11 56Z ~ 120             4       NA " "           10    28    -4      2996   126
## 12 <NA>  <NA>           NA       NA  <NA>         NA    NA    NA        NA    NA
## 13 56Z ~ 000             0       NA " "            9     8     4      3006   176
## 14 56Z ~ 230            10       NA " "           10    30    -1      2987    91
## 15 56Z ~ 000             0       NA " "           10    10     4      2999   149
## 16 56Z ~ 210             5       NA " "           10    32    -7      2978    51
## 17 56Z ~ 230             5       NA " "           10    16     6      2996   129
## 18 56Z ~ 200             4       NA " "           10     5     0      2982    97
## 19 56Z ~ 180             3       NA " "            6     8     7      2996   145
## 20 56Z ~ 200             3       NA " "           10    28    -1      2992    98
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         35         35         35       8818         35         35         35 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         35         35         35         35          0          0         35 
##       DewF 
##         35

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 9 x 2
##   Visibility     n
##        <dbl> <int>
## 1          3     4
## 2          4     8
## 3          5    11
## 4          6    13
## 5          7    16
## 6          8    26
## 7          9    42
## 8         10  8663
## 9         NA    35
##    WindGust    n
## 1        14   20
## 2        15   77
## 3        16   91
## 4        17  146
## 5        18  126
## 6        19  126
## 7        20  116
## 8        21  106
## 9        22  111
## 10       23   77
## 11       24   82
## 12       25   73
## 13       26   53
## 14       27   38
## 15       28   28
## 16       29   28
## 17       30   23
## 18       31   20
## 19       32    9
## 20       33   11
## 21       34   13
## 22       35    6
## 23       36   10
## 24       37    3
## 25       38    1
## 26       39    5
## 27       40    1
## 28       41    1
## 29       44    2
## 30       45    2
## 31       NA 7413
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8783 complete cases (99.6% of 8818 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.22  0.22     -0.51  -0.62      0.22       0.01
## TempF       1.00  1.00  0.22  0.22     -0.51  -0.62      0.22       0.01
## DewC        0.22  0.22  1.00  1.00     -0.24  -0.27     -0.04      -0.13
## DewF        0.22  0.22  1.00  1.00     -0.24  -0.27     -0.04      -0.13
## Altimeter  -0.51 -0.51 -0.24 -0.24      1.00   0.99     -0.38       0.06
## modSLP     -0.62 -0.62 -0.27 -0.27      0.99   1.00     -0.38       0.06
## WindSpeed   0.22  0.22 -0.04 -0.04     -0.38  -0.38      1.00      -0.02
## Visibility  0.01  0.01 -0.13 -0.13      0.06   0.06     -0.02       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1381 -0.7848 -0.0607  0.6891  3.6362 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.163e+02  1.730e+00  -67.24   <2e-16 ***
## Altimeter    3.769e-01  5.776e-04  652.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9753 on 8781 degrees of freedom
##   (35 observations deleted due to missingness)
## Multiple R-squared:  0.9798, Adjusted R-squared:  0.9798 
## F-statistic: 4.258e+05 on 1 and 8781 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18161 -0.33113  0.02395  0.33395  1.08509 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.537e+01  8.693e-01  -29.18   <2e-16 ***
## Altimeter    3.478e-01  2.868e-04 1212.89   <2e-16 ***
## TempF       -5.581e-02  2.813e-04 -198.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4165 on 8780 degrees of freedom
##   (35 observations deleted due to missingness)
## Multiple R-squared:  0.9963, Adjusted R-squared:  0.9963 
## F-statistic: 1.187e+06 on 2 and 8780 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## No Records with a cloud type of vertical visibility (VV)
## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1  144
## 2      0    0      0      0      1      0  635
## 3      0    0      0      0      1      1  107
## 4      0    0      0      0      2      0  139
## 5      0    0      0      0      2      1   39
## 6      0    0      0      0      3      0   13
## 7      0    0      0      0      4      0    1
## 8      0    0      0      1      0      0  590
## 9      0    0      0      1      0      1   54
## 10     0    0      0      1      1      0  242
## 11     0    0      0      1      1      1   51
## 12     0    0      0      1      2      0   88
## 13     0    0      0      1      2      1    1
## 14     0    0      0      1      3      0    1
## 15     0    0      0      2      0      0  134
## 16     0    0      0      2      0      1    7
## 17     0    0      0      2      1      0   37
## 18     0    0      0      2      2      0    4
## 19     0    0      0      3      0      0    4
## 20     0    0      0      3      1      0    2
## 21     0    0      1      0      0      0 1933
## 22     0    0      1      0      0      1   99
## 23     0    0      1      0      1      0  454
## 24     0    0      1      0      1      1   92
## 25     0    0      1      0      2      0  140
## 26     0    0      1      0      2      1    2
## 27     0    0      1      0      3      0    2
## 28     0    0      1      1      0      0  330
## 29     0    0      1      1      0      1   34
## 30     0    0      1      1      1      0  212
## 31     0    0      1      1      1      1   17
## 32     0    0      1      1      2      0   26
## 33     0    0      1      2      0      0   53
## 34     0    0      1      2      0      1    1
## 35     0    0      1      2      1      0    9
## 36     0    0      1      3      0      0    1
## 37     0    0      2      0      0      0  533
## 38     0    0      2      0      0      1   18
## 39     0    0      2      0      1      0   64
## 40     0    0      2      0      1      1    2
## 41     0    0      2      0      2      0    3
## 42     0    0      2      1      0      0   75
## 43     0    0      2      1      0      1    1
## 44     0    0      2      1      1      0    5
## 45     0    0      2      2      0      0    3
## 46     0    0      3      0      0      0    4
## 47     0    0      3      0      0      1    1
## 48     0    0      3      0      1      0    1
## 49     0    0      3      1      0      0    2
## 50     0    0      3      1      1      0    1
## 51     1    0      0      0      0      0 2407
## 
## *** METAR records where no clouds were extracted ***
## character(0)
## 
## *** Dimensions for the cloud matrix ***
## [1] 8818   12
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8818 obs. of  7 variables:
##   ..$ isCLR : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##  $ mtxCloud : chr [1:8818, 1:12] "" "" "" "" ...
## Warning in min(isKey): no non-missing arguments to min; returning Inf
## Warning in stri_replace_first_regex(string, pattern,
## fix_replacement(replacement), : NAs introduced by coercion to integer range

## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,818 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA     NA     NA     NA     NA
##  2    NA     NA     NA     NA     NA
##  3    NA     NA     NA     NA     NA
##  4    NA     NA     NA     NA     NA
##  5    NA     NA     NA     NA     NA
##  6    NA     NA     NA     NA     NA
##  7    NA     NA     NA     NA     NA
##  8    NA     NA     NA     NA     NA
##  9    NA     NA     NA     NA     NA
## 10    NA     NA     NA     NA     NA
## # ... with 8,808 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  5 variables:
##   ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
##  $ minCloudLevel  : num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  30 variables:
##  $ METAR     : chr  "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ WindDir   : chr  "100" "010" "010" "340" ...
##  $ WindSpeed : int  3 5 5 5 3 3 3 0 0 3 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  8 7 6 5 4 3 2 2 1 1 ...
##  $ DewC      : int  -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##  $ Altimeter : int  3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##  $ SLP       : int  196 204 210 215 216 219 221 222 226 229 ...
##  $ FahrC     : chr  "T00831078" "T00671083" "T00611089" "T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ TempF     : num  46.9 44.1 43 41 39.9 ...
##  $ DewF      : num  18 17.1 16 16 17.1 ...
##  $ modSLP    : num  1020 1020 1021 1022 1022 ...
##  $ isCLR     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : logi  NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowBKN    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(klas2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  30 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ modSLP    : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ isCLR     : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ isVV      : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ lowVV     : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 6 6 6 6 6 6 6 6 6 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  29 variables:
##   ..$ station          : chr [1:8818] "LAS" "LAS" "LAS" "LAS" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ tmpf             : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ dwpf             : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ relh             : num [1:8818] 31.1 33.4 33.2 35.9 39.2 ...
##   ..$ drct             : num [1:8818] 100 10 10 340 210 230 230 0 0 210 ...
##   ..$ sknt             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.1 30.1 30.1 30.1 30.1 ...
##   ..$ mslp             : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "CLR" "CLR" "CLR" "CLR" ...
##   ..$ skyc2            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8818] NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 45.8 40.7 39.4 37.1 37.7 ...
##   ..$ metar            : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_logical(),
##   .. ..   ice_accretion_3hr = col_logical(),
##   .. ..   ice_accretion_6hr = col_logical(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  32 variables:
##   ..$ station          : chr [1:8818] "LAS" "LAS" "LAS" "LAS" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ tmpf             : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ dwpf             : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ relh             : num [1:8818] 31.1 33.4 33.2 35.9 39.2 ...
##   ..$ drct             : num [1:8818] 100 10 10 340 210 230 230 0 0 210 ...
##   ..$ sknt             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.1 30.1 30.1 30.1 30.1 ...
##   ..$ mslp             : num [1:8818] 1020 1020 1021 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "CLR" "CLR" "CLR" "CLR" ...
##   ..$ skyc2            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8818] NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 45.8 40.7 39.4 37.1 37.7 ...
##   ..$ metar            : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ dirW             : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ spdW             : num [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ gustW            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  13 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : chr [1:8818] "03" "05" "05" "05" ...
##   ..$ WindGust  : chr [1:8818] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: chr [1:8818] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8818] "08" "07" "06" "05" ...
##   ..$ DewC      : chr [1:8818] "M08" "M08" "M09" "M09" ...
##   ..$ Altimeter : chr [1:8818] "A3009" "A3011" "A3013" "A3014" ...
##   ..$ SLP       : chr [1:8818] "SLP196" "SLP204" "SLP210" "SLP215" ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  15 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  16 variables:
##   ..$ METAR     : chr [1:8818] "56Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "56Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "56Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089" "56Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ WindDir   : chr [1:8818] "100" "010" "010" "340" ...
##   ..$ WindSpeed : int [1:8818] 3 5 5 5 3 3 3 0 0 3 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 8 7 6 5 4 3 2 2 1 1 ...
##   ..$ DewC      : int [1:8818] -8 -8 -9 -9 -8 -8 -9 -9 -8 -8 ...
##   ..$ Altimeter : int [1:8818] 3009 3011 3013 3014 3015 3016 3017 3017 3018 3019 ...
##   ..$ SLP       : int [1:8818] 196 204 210 215 216 219 221 222 226 229 ...
##   ..$ FahrC     : chr [1:8818] "T00831078" "T00671083" "T00611089" "T00501089" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##   ..$ origMETAR : chr [1:8818] "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##   ..$ TempF     : num [1:8818] 46.9 44.1 43 41 39.9 ...
##   ..$ DewF      : num [1:8818] 18 17.1 16 16 17.1 ...
##   ..$ modSLP    : num [1:8818] 1020 1020 1021 1022 1022 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : logi [1:8818] NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ mtxCloud : chr [1:8818, 1:12] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8818 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowBKN: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...
##   ..$ minCloudLevel  : num [1:8818] 1e+06 1e+06 1e+06 1e+06 1e+06 ...

Example #25: More Generic Downloading the Data by Function

The download function can be more generic, with just a few key arguments passed to it. Those arguments can then be derived in a helper function, with the generic function called from inside that.

Example code includes:

genericGetASOSData <- function(fileLoc, 
                               stationID,
                               startDate, 
                               endDate,
                               downloadMethod="curl", 
                               baseURL="https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?", 
                               dataFields="all", 
                               dataTZ="Etc%2FUTC", 
                               dataFormat="onlycomma", 
                               dataLatLon="no", 
                               dataMissing="M", 
                               dataTrace="T", 
                               dataDirect="no", 
                               dataType=2
                               ) {
    
    # Get the year, day, and hour of the key dates
    y1 <- lubridate::year(startDate)
    m1 <- lubridate::month(startDate)
    d1 <- lubridate::day(startDate)
    
    y2 <- lubridate::year(endDate)
    m2 <- lubridate::month(endDate)
    d2 <- lubridate::day(endDate)

    # Mimic the string shown below
    # https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=LAS&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2
    
    useURL <- paste0(baseURL, "station=", stationID)  # add the desired station
    useURL <- paste0(useURL, "&data=", dataFields)  # default is "all
    useURL <- paste0(useURL, "&year1=", y1, "&month1=", m1, "&day1=", d1)  # Specify start ymd
    useURL <- paste0(useURL, "&year2=", y2, "&month2=", m2, "&day2=", d2)  # Specify end ymd
    useURL <- paste0(useURL, "&tz=", dataTZ)  # time zone (default UTC)
    useURL <- paste0(useURL, "&format=", dataFormat)  # file format (default CSV)
    useURL <- paste0(useURL, "&latlon=", dataLatLon)  # Whether to include lat-lon (default no)
    useURL <- paste0(useURL, "&missing=", dataMissing)  # How to handle missing data (default is 'M')
    useURL <- paste0(useURL, "&trace=", dataTrace)  # How to handle trace data (default is 'T')
    useURL <- paste0(useURL, "&direct=", dataDirect)  # Whether to directly get the data (default is 'no')
    useURL <- paste0(useURL, "&report_type=", dataType)  # Whether to get just METAR (2, default)
    
    # Download the file
    cat("\nDownloading from:", useURL, "\nDownloading to:", fileLoc, "\n")
    download.file(useURL, destfile=fileLoc, method=downloadMethod)
        
    return(TRUE)
}

Which can then be called by a function that checks whether the file already exists:

getASOSStationTime <- function(stationID, 
                               startDate=NULL, 
                               endDate=NULL, 
                               analysisYears=NULL, 
                               fileLoc=NULL,
                               ovrWrite=FALSE,
                               ...) {
    
    # Get the relevant time period for the data
    if (is.null(analysisYears) & (is.null(startDate) | is.null(endDate))) {
        stop("Must provide either analysisYears or both of startDate and endDate")
    }
    if (!is.null(startDate) & !is.null(endDate) & !is.null(analysisYears)) {
        stop("Should specify EITHER both of startDate and endDate OR analysisYears BUT NOT both")
    }
    if (is.null(startDate)) {
        startDate <- ISOdate(min(analysisYears)-1, 12, 31, hour=0)
        endDate <- ISOdate(max(analysisYears)+1, 1, 2, hour=0)
    }
    
    # Create the file name
    if (!is.null(analysisYears)) {
        if (length(analysisYears) == 1) { timeDesc <- analysisYears }
        else { timeDesc <- paste0(min(analysisYears), "-", max(analysisYears)) }
    } else {
        timeDesc <- paste0(lubridate::year(startDate), 
                           str_pad(lubridate::month(startDate), 2, pad="0"),
                           str_pad(lubridate::day(startDate), 2, pad="0"), 
                           "-", 
                           lubridate::year(endDate), 
                           str_pad(lubridate::month(endDate), 2, pad="0"), 
                           str_pad(lubridate::day(endDate), 2, pad="0")
                           )
    }
    
    if (is.null(fileLoc)) {
        fileLoc <- paste0("./RInputFiles/", "metar_k", str_to_lower(stationID), "_", timeDesc, ".txt")
    }
    
    cat("\nData for station", stationID, "from", as.character(startDate), "to", 
        as.character(endDate), "will download to", fileLoc, "\n"
        )
    
    if (file.exists(fileLoc) & !ovrWrite) {
        stop("File already exists, aborting")
    }
    
    genericGetASOSData(fileLoc=fileLoc, stationID=stationID, startDate=startDate, endDate=endDate, ...)
    
}

And the files can then be run (cached to avoid multiple hits to the Iowa State server):

# Get data for EWR for 2016
getASOSStationTime(stationID="EWR", analysisYears=2016)
## 
## Data for station EWR from 2015-12-31 to 2017-01-02 will download to ./RInputFiles/metar_kewr_2016.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=EWR&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kewr_2016.txt
## [1] TRUE
# Get data for ATL for 2016-2018
getASOSStationTime(stationID="ATL", analysisYears=2016:2018)
## 
## Data for station ATL from 2015-12-31 to 2019-01-02 will download to ./RInputFiles/metar_katl_2016-2018.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=ATL&data=all&year1=2015&month1=12&day1=31&year2=2019&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_katl_2016-2018.txt
## [1] TRUE
# Get data for DFW for 2016-03-31 to 2017-03-01
getASOSStationTime(stationID="DFW", 
                   startDate=ISOdate(2016, 03, 31, hour=0), 
                   endDate=ISOdate(2017, 3, 1, hour=0)
                   )
## 
## Data for station DFW from 2016-03-31 to 2017-03-01 will download to ./RInputFiles/metar_kdfw_20160331-20170301.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=DFW&data=all&year1=2016&month1=3&day1=31&year2=2017&month2=3&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kdfw_20160331-20170301.txt
## [1] TRUE

Example #26: Comparative Wind Directions by Location

The processed data files can be compared, with wind directions being one of the comparison sets.

Example code includes:

# Extract the wind direction data from a processed METAR file
getWindDirGroup <- function(keyList, src) {
    
    # Use the fullMETAR data and extract WindDir, WindSpeed, month
    windPlotData <- get(keyList)[["fullMETAR"]] %>%
        select(WindDir, WindSpeed, month) %>%
        mutate(windDirGroup=factor(case_when(WindSpeed==0 ~ "No Wind", 
                                             WindDir=="VRB" ~ "Variable", 
                                             WindDir %in% c("030", "040", "050", "060") ~ "NE", 
                                             WindDir %in% c("070", "080", "090", "100", "110") ~ "E", 
                                             WindDir %in% c("120", "130", "140", "150") ~ "SE", 
                                             WindDir %in% c("160", "170", "180", "190", "200") ~ "S", 
                                             WindDir %in% c("210", "220", "230", "240") ~ "SW", 
                                             WindDir %in% c("250", "260", "270", "280", "290") ~ "W", 
                                             WindDir %in% c("300", "310", "320", "330") ~ "NW", 
                                             WindDir %in% c("340", "350", "360", "010", "020") ~ "N", 
                                            TRUE ~ "Error"
                                            ) , levels=c("No Wind", "Variable", "Error", 
                                                         "N", "NE", "E", "SE", "S", "SW", "W", "NW"
                                                         )
                                   )
               )
    
    # Rempve the errors and calculate percentages by month for the remainder
    processedWindData <- windPlotData %>%
        filter(windDirGroup != "Error") %>%
        group_by(month, windDirGroup) %>%
        summarize(n=n()) %>%
        ungroup() %>%
        group_by(month) %>%
        mutate(pct=n/sum(n)) %>%
        ungroup() %>%
        mutate(src=src)
    
    processedWindData

}


# Consolidate and plot wind data
consolidatePlotWind <- function(files, names) {

    consFun <- function(x, y) { getWindDirGroup(keyList=x, src=y) }
    boundByRows <- map2_dfr(.x=files, .y=names, .f=consFun)

    p <- boundByRows %>%
        ggplot(aes(x=month, y=pct, color=src)) + 
        geom_line(aes(group=src)) + 
        facet_wrap(~windDirGroup) + 
        labs(title="Wind Direction Frequency by Month", x="Month", y="Frequency of Wind Observations") +
        theme(axis.text.x=element_text(angle=90))
    print(p)
    
    boundByRows
}

# Run for 2016 with Lincoln, NE; Las Vegas, NV; and Chicago, IL
cpWind <- consolidatePlotWind(files=c("klnk2016METAR", "klas2016METAR", "kord2016METAR"), 
                              names=c("Lincoln, NE (2016)", "Las Vegas, NV (2016)", "Chicago, IL (2016)")
                              )

cpWind
## # A tibble: 359 x 5
##    month windDirGroup     n     pct src               
##    <fct> <fct>        <int>   <dbl> <chr>             
##  1 Jan   No Wind         76 0.0992  Lincoln, NE (2016)
##  2 Jan   Variable         3 0.00392 Lincoln, NE (2016)
##  3 Jan   N              179 0.234   Lincoln, NE (2016)
##  4 Jan   NE              31 0.0405  Lincoln, NE (2016)
##  5 Jan   E               30 0.0392  Lincoln, NE (2016)
##  6 Jan   SE              63 0.0822  Lincoln, NE (2016)
##  7 Jan   S              174 0.227   Lincoln, NE (2016)
##  8 Jan   SW              57 0.0744  Lincoln, NE (2016)
##  9 Jan   W               65 0.0849  Lincoln, NE (2016)
## 10 Jan   NW              88 0.115   Lincoln, NE (2016)
## # ... with 349 more rows

Example #27: Extracting Precipitation Information from METAR

METAR data include descriptions of the precipitation occuring at any given time. Two of the most common precipitation forms are rain (RA) and SN(). These can occur together, denoted as RASN or SNRA in the METAR.

Further, the precipitation type can be classified using a prefix as light (-), moderate (no prefix), or heavy (+). So, RA would be moderate rain, -SNRA would be a light snow-rain mix, +RA would be heavy rain.

Additionally, the timing of the precipitation event is captured in the remarks using B (begin) and E (end). So, an hourly METAR of RAB20E35B50 would mean rain started at 20 past the hour, ended at 35 past the hour, and began again at 50 past the hour. Since METAR are often taken just before the top of the hour, a four-digit time is used if it is in the ‘previous’ hour; for example, RAB1959E36 in the 2053Z METAR.

We can use the remarks to see when it was raining in the given METAR.

Example code includes:

# Extract the METAR and the date-time from a processed list
procMET <- klas2016METAR$fullMETAR %>% 
    select(origMETAR, dtime)

# Check whether there are comments for either rain regins (RAB) or rain ends (RAE) and pull all the data
procMET <- procMET %>% 
    mutate(isRain=grepl("RA[B|E]", origMETAR), 
           rainData=str_extract(origMETAR, pattern="(RA[B|E]\\d+[0-9BE]*)"), 
           nBegin=pmax(0, str_count(rainData, "B"), na.rm=TRUE), 
           nEnd=pmax(0, str_count(rainData, "E"), na.rm=TRUE), 
           dateUTC=lubridate::date(dtime), 
           hourUTC=lubridate::hour(dtime)
           )
str(procMET)
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR: chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime    : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ isRain   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ rainData : chr  NA NA NA NA ...
##  $ nBegin   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC  : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC  : int  0 1 2 3 4 5 6 7 8 9 ...
# Check the counts of rain beginning and rain ending
procMET %>%
    count(isRain, nBegin, nEnd)
## # A tibble: 7 x 4
##   isRain nBegin  nEnd     n
##   <lgl>   <dbl> <dbl> <int>
## 1 FALSE       0     0  8643
## 2 TRUE        0     1    56
## 3 TRUE        1     0    51
## 4 TRUE        1     1    59
## 5 TRUE        1     2     2
## 6 TRUE        2     1     6
## 7 TRUE        2     2     1
# Extract all the times when rain began (can be multiple per METAR)
rainMET <- procMET %>%
    select(dateUTC, hourUTC, rainData, nBegin, nEnd)

# Confirm the rainMET is unique by dateUTC and hourUTC
dupAns <- rainMET %>%
    select(dateUTC, hourUTC) %>%
    duplicated() %>%
    any()
cat("\nAre there any problems with duplicated keys?", dupAns, "\n")
## 
## Are there any problems with duplicated keys? FALSE
# Extract the matrix of rain beginning data
rainBegin <- rainMET %>% 
    pull(rainData) %>% 
    str_extract_all("B\\d+", simplify=TRUE) %>%
    as.data.frame(stringsAsFactors=FALSE)

# Extract the matrix of rain ending data
rainEnd <- rainMET %>% 
    pull(rainData) %>% 
    str_extract_all("E\\d+", simplify=TRUE) %>%
    as.data.frame(stringsAsFactors=FALSE)

# Convert the rain begins data to the hour and minute associated to the UTC
extractTime <- function(x, var, sym="B") {
    if (is.na(x[var]) | x[var]=="") {
        utcUse <- NA
    }
    else {
        utcUse <- str_replace(x[var], sym, "")
        if (str_length(utcUse)==4) {
            utcUse <- paste0(x["dateUTC"], " ", utcUse)
        } else if (str_length(utcUse)==2) {
            utcUse <- paste0(x["dateUTC"], " ", x["hourChar"], utcUse)
        } else {
            cat("\nCannot parse data: ", x, "\n", x[var], "\n", var, sym, utcUse)
            stop()
        }
    }
}

# Extract the begin times from V1
beginTime1 <- rainBegin %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V1", sym="B")

# Extract the begin times from V2
beginTime2 <- rainBegin %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V2", sym="B")

# Extract the end times from V1
endTime1 <- rainEnd %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V1", sym="E")

# Extract the end times from V2
endTime2 <- rainEnd %>%
    cbind(rainMET[, c("dateUTC", "hourUTC")]) %>%
    mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
    apply(1, FUN=extractTime, var="V2", sym="E")

allBegins <- 
    c(beginTime1[!is.na(beginTime1)], beginTime2[!is.na(beginTime2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

allEnds <- 
    c(endTime1[!is.na(endTime1)], endTime2[!is.na(endTime2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

# Check the each ending is after its associated beginning
endMinusBeg <- allEnds - allBegins
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
# Rain is listed as ending at 2016-05-01 00:42 and 2016-04-30 17:38 both for start 2016-04-30 11:20
endMinusBeg <- allEnds[allEnds != lubridate::ymd_hm("2016-05-01 0042")] - allBegins

# Plot the rain durations in minutes
data.frame(minutesRain=as.numeric(endMinusBeg), month=lubridate::month(allBegins)) %>%
    ggplot(aes(x=minutesRain)) +
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Plot the rain totals (in hours) by month
data.frame(minutesRain=as.numeric(endMinusBeg), month=lubridate::month(allBegins)) %>%
    group_by(month) %>%
    summarize(minutesRain=sum(minutesRain), nRain=n()) %>%
    ggplot(aes(x=factor(month.abb[month], levels=month.abb[1:12]), y=minutesRain/60)) +
    geom_col() + 
    labs(title="Las Vegas, NV Rainfall (hours) in 2016", y="Hours of Rain", x="Month") + 
    geom_text(aes(y=2+minutesRain/60, label=round(minutesRain/60, 1)))

Example #28: Function for Extracting Precipitation Information from METAR

The precipitation extraction process can be converted to functions. In addition to being more modular, several additional features can be included:

  • Allow to search for other precipitation types, specifically snow (SN) or drizzle (DZ)
  • Correct for the isue that a time of 2016-01-04 2359Z is placed in the 206-01-05 bucket by the code above
  • Identify areas where the intervals are not sensible or raise significant questions (negative durations, durations longer than 24 hours)
  • Check that intervals overlap with the time periods in the METAR that show that precipitation event

Example code includes:

  • Extract the precipitation data from a processed METAR file
  • Find the begin and end times from this file
  • Check for intervals of questionable length and correct as needed
# Extract the precipitation data from a processed METAR file
extractPrecipData <- function(processedFile, pType="RA") {

    # Extract the METAR and the date-time from a processed list
    procMET <- processedFile[["fullMETAR"]] %>% 
        select(origMETAR, dtime)

    # Check whether there are comments for the desired pType either beginning or ending
    keyPattern <- paste0("(", pType, "[B|E]\\d+[0-9BE]*)")
    cat("\nRegex search code is:", keyPattern, "\n\n")
    
    procMET <- procMET %>% 
        mutate(precipData=str_extract(origMETAR, pattern=keyPattern), 
               isPrecip=grepl(paste0(pType, "[B|E]"), origMETAR, perl=TRUE), 
               nBegin=pmax(0, str_count(precipData, "B"), na.rm=TRUE), 
               nEnd=pmax(0, str_count(precipData, "E"), na.rm=TRUE), 
               dateUTC=lubridate::date(dtime), 
               hourUTC=lubridate::hour(dtime)
               )
    str(procMET)
    cat("\n")

    # Check the counts of precipitation beginning and rain ending
    procMET %>%
        count(isPrecip, nBegin, nEnd) %>%
        print()

    # Check that the file is unique by time
    dupAns <- procMET %>%
        select(dateUTC, hourUTC) %>%
        duplicated %>%
        any()

    cat("\nAre there any problems with duplicated keys?", dupAns, "\n")
    
    procMET
}

testFileProc <-extractPrecipData(klas2016METAR, pType="RA")
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8643
## 2 TRUE          0     1    56
## 3 TRUE          1     0    51
## 4 TRUE          1     1    59
## 5 TRUE          1     2     2
## 6 TRUE          2     1     6
## 7 TRUE          2     2     1
## 
## Are there any problems with duplicated keys? FALSE

Next, a function for extrating beginning and ending times can be written:

# Helper function to extract the beginning and ending times using str_extract_all
getBeginEndTimeMatrix <- function(file, pullVar="precipData", pState="B") {
    file %>%
        pull(pullVar) %>%
        str_extract_all(paste0(pState, "\\d+"), simplify=TRUE) %>%
        as.data.frame(stringsAsFactors=FALSE)    
}

testBegin <- getBeginEndTimeMatrix(testFileProc, pState="B")
testEnd <- getBeginEndTimeMatrix(testFileProc, pState="E")

if (ncol(testBegin) != 2 | ncol(testEnd) != 2) { 
    stop("Hard-coded for 2 columns each of begin/end- Fix") 
}

# Helper function to convert begin and end time using date and hour
extractTime <- function(x, var, sym="B") {
    if (is.na(x[var]) | x[var]=="") {
        utcReturn <- NA
    }
    else {
        utcUse <- str_replace(x[var], sym, "")
        if (str_length(utcUse)==4) {
            utcReturn <- paste0(x["dateUTC"], " ", utcUse)
            # If a 4-digit time starts with 23 and is in the 0Z METAR, it is part of the previous day
            if(str_sub(utcUse, 1, 2)=="23" & as.numeric(x["hourChar"])==0) {
                utcReturn <- paste0(as.Date(x["dateUTC"]) - 1, " ", utcUse)
            }
        } else if (str_length(utcUse)==2) {
            utcReturn <- paste0(x["dateUTC"], " ", x["hourChar"], utcUse)
        } else {
            cat("\nCannot parse data: ", x, "\n", x[var], "\n", var, sym, utcUse)
            stop()
        }
    }
    utcReturn
}

getBeginEndTimeVector <- function(timeExtractFile, origFullFile, extractVar, extractSym) {
    timeExtractFile %>%
        cbind(origFullFile[, c("dateUTC", "hourUTC")]) %>%
        mutate(hourChar=str_pad(str_trim(as.character(hourUTC)), width=2, pad="0")) %>%
        apply(1, FUN=extractTime, var=extractVar, sym=extractSym)
}

testBT1 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V1", extractSym="B")
testBT2 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V2", extractSym="B")
testET1 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V1", extractSym="E")
testET2 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V2", extractSym="E")

testAllBegins <- 
    c(testBT1[!is.na(testBT1)], testBT2[!is.na(testBT2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

testAllEnds <- 
    c(testET1[!is.na(testET1)], testET2[!is.na(testET2)]) %>%
    lubridate::ymd_hm() %>%
    sort()

Next, create the time intervals vector, with the capability to change the start state, check the intervals, exclude times as needed, and compare to raw METAR.

Example code includes:

# Function to create the time intervals data
createPrecipInterval <- function(endVector, beginVector, endExclude=c(), beginExclude=c(), 
                                 sState=FALSE, nMinPrint=1, maxProb=1000, nMaxPrint=1
                                 ) {
    
    # If the starting state is one of precipitation, allow it to 'burn in' by deleting the first end time
    if(sState) { endVector <- endVector[2:length(endVector)] }
    
    # Create the interval data
    endsUse <- endVector[!(endVector %in% lubridate::ymd_hm(endExclude))]
    beginsUse <- beginVector[!(beginVector %in% lubridate::ymd_hm(beginExclude))]
    intervalData <- endsUse - beginsUse
    
    # Show a summary of the interval data
    print(summary(as.numeric(intervalData)))
    
    # If there are any non-positive intervals, print the data causing the first of them
    if (min(as.numeric(intervalData)) <= 0) {
        cat("\nProblem Detected - Intervals are not positive.  Data to help investigate\n")
        posns <- which(as.numeric(intervalData) <= 0)
        posns <- max(1, posns[1]-5):min(length(beginsUse), length(endsUse), posns[nMinPrint]+5)
        cat("\nVector of Begins\n")
        print(lubridate::as_datetime(beginsUse[posns]))
        cat("\nVector of Ends\n")
        print(lubridate::as_datetime(endsUse[posns]))
        cat("\n")
    }
    
    # If there are any very long positive intervals, print the data causing the first five of them
    if (max(as.numeric(intervalData)) >= maxProb) {
        cat("\nPotential problem Detected - very long.  Data to help investigate\n")
        posns <- which(as.numeric(intervalData) >= maxProb)
        cat("\nPositions with problems are:", posns)
        posns <- max(1, posns[1]-5):min(length(beginsUse), length(endsUse), posns[min(length(posns), nMaxPrint)]+5)
        cat("\nVector of Begins\n")
        print(lubridate::as_datetime(beginsUse[posns]))
        cat("\nVector of Ends\n")
        print(lubridate::as_datetime(endsUse[posns]))
        cat("\n")
    }
    
    # Return the interval data
    intervalData
}

# Run a full pass for Las Vegas Rain
testIntervals <- createPrecipInterval(testAllEnds, testAllBegins)
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -41221.0   -454.5    -16.0   1382.7     30.0 520731.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-04-28 21:33:00 UTC" "2016-04-29 00:34:00 UTC"
##  [3] "2016-04-29 03:27:00 UTC" "2016-04-30 08:51:00 UTC"
##  [5] "2016-04-30 11:20:00 UTC" "2016-05-01 04:16:00 UTC"
##  [7] "2016-05-01 04:55:00 UTC" "2016-05-06 14:35:00 UTC"
##  [9] "2016-05-06 19:20:00 UTC" "2016-05-07 07:24:00 UTC"
## [11] "2016-05-07 22:49:00 UTC"
## 
## Vector of Ends
##  [1] "2016-04-28 21:42:00 UTC" "2016-04-29 00:58:00 UTC"
##  [3] "2016-04-29 03:51:00 UTC" "2016-04-30 09:53:00 UTC"
##  [5] "2016-04-30 17:38:00 UTC" "2016-05-01 00:42:00 UTC"
##  [7] "2016-05-01 04:31:00 UTC" "2016-05-01 05:10:00 UTC"
##  [9] "2016-05-06 15:12:00 UTC" "2016-05-06 19:30:00 UTC"
## [11] "2016-05-07 07:45:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 127
## Vector of Begins
## [1] "2016-12-25 00:16:00 UTC" "2016-12-30 22:09:00 UTC"
## [3] "2016-12-30 23:44:00 UTC" "2016-12-31 03:59:00 UTC"
## [5] "2016-12-31 05:45:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 14:06:00 UTC" "2016-12-25 00:32:00 UTC"
## [3] "2016-12-30 22:35:00 UTC" "2016-12-31 00:04:00 UTC"
## [5] "2016-12-31 04:12:00 UTC"
testIntervals <- createPrecipInterval(testAllEnds, testAllBegins, endExclude=c("2016-05-01 0042"))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   16.00   29.00   52.15   57.75  502.00
createPrecipIntervalPlots <- function(intervalData, beginsData, titleText, yAxisText, 
                                      beginExclude=c(), returnPlotsAndData=FALSE
                                      ) {

    # Exclude any data from begins as needed
    beginsData <- beginsData[!(beginsData %in% lubridate::ymd_hm(beginExclude))]
    
    # Create a plotting data frame
    histFrame <- data.frame(minutesPrecip=as.numeric(intervalData), 
                            month=lubridate::month(beginsData), 
                            rainDate=lubridate::date(beginsData)
                            ) %>%
        mutate(hoursPrecip=minutesPrecip/60)
    
    # Plot the precipitation durations in hours
    p1 <- histFrame %>%
        ggplot(aes(x=hoursPrecip)) +
        geom_histogram() + 
        labs(title=titleText, x=yAxisText, 
             subtitle="Distribution of hours per unique precipitation event"
             )
    print(p1)

    # Plot the precipitation by day in hours
    p2 <- histFrame %>%
        group_by(rainDate) %>%
        summarize(hoursPrecip=sum(hoursPrecip)) %>%
        ggplot(aes(x=hoursPrecip)) +
        geom_histogram() + 
        labs(title=titleText, x=yAxisText, 
             subtitle="Distribution of hours per day of precipitation (on days when 1+ minutes occurred)"
             )
    print(p2)
    
    # Plot the rain totals (in hours) by month
    # Create a data frame of all months and merge in precipitation data as an where available (0 otherwise)
    monthFrame <- histFrame %>%
        group_by(month) %>%
        summarize(minutesPrecip=sum(minutesPrecip), hoursPrecip=sum(hoursPrecip), nPrecip=n()) %>%
        right_join(data.frame(month=1:12, monthName=month.abb[1:12]), by="month") %>%
        tidyr::replace_na(list(minutesPrecip=0, hoursPrecip=0, nPrecip=0))
    # print(monthFrame)
    
    p3 <- monthFrame %>%
        ggplot(aes(x=factor(monthName, levels=month.abb[1:12]), y=hoursPrecip)) +
        geom_col() + 
        labs(title=titleText, y=yAxisText, x="") + 
        geom_text(aes(y=2 + hoursPrecip, label=round(hoursPrecip, 1)))
    print(p3)
    
    if (returnPlotsAndData) {
        list(histFrame=histFrame, monthFrame=monthFrame, p1=p1, p2=p2, p3=p3)
    } else {
        NULL
    }
}

# Plots for Las Vegas Rain
createPrecipIntervalPlots(testIntervals, testAllBegins, 
                          titleText="Las Vegas, NV Rainfall (hours) in 2016", yAxisText="Hours of Rain"
                          )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## NULL

Example #29: Combining Functions and Extending to Other Locales and Precipitation Types

The functions can be combined, and the approach then extended to other locales and precipitation types.

Example code includes:

# Combining all the functions in one place
runFullPrecipExtraction <- function(df, 
                                    pType, 
                                    titleText, 
                                    yAxisText,
                                    endExclude=c(), 
                                    beginExclude=c(), 
                                    endAdd=c(),
                                    beginAdd=c(),
                                    maxProb=1000, 
                                    sState=FALSE, 
                                    makePlots=TRUE, 
                                    returnPlotsAndData=FALSE
                                    ) {
    
    # Extract the precipitation data from a specified processed METAR file
    testFileProc <-extractPrecipData(df, pType=pType)
    
    # Confirm that the two-column specification is met (should relax hard-coding on this)
    testBegin <- getBeginEndTimeMatrix(testFileProc, pState="B")
    testEnd <- getBeginEndTimeMatrix(testFileProc, pState="E")

    if (!(ncol(testBegin) %in% c(1, 2, 3)) | !(ncol(testEnd) %in% c(1, 2, 3))) { 
        cat("\nBegin columns:", ncol(testBegin), "\t\tEndcolumns:", ncol(testEnd))
        stop("Hard-coded for 1-3 columns each of begin/end- Fix")
    }
    
    # Extract the beginning and ending information
    testBT1 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V1", extractSym="B")
    testET1 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V1", extractSym="E")
    
    if (ncol(testBegin) >= 2) {
        testBT2 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V2", extractSym="B")
    } else {
        testBT2 <- c()
    }
    if (ncol(testEnd) >= 2) {
        testET2 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V2", extractSym="E")
    } else {
        testET2 <- c()
    }

    if (ncol(testBegin) >= 3) {
        testBT3 <- getBeginEndTimeVector(testBegin, testFileProc, extractVar="V3", extractSym="B")
    } else {
        testBT3 <- c()
    }
    if (ncol(testEnd) >= 3) {
        testET3 <- getBeginEndTimeVector(testEnd, testFileProc, extractVar="V3", extractSym="E")
    } else {
        testET3 <- c()
    }
    
    testAllBegins <- 
        c(testBT1[!is.na(testBT1)], testBT2[!is.na(testBT2)], testBT3[!is.na(testBT3)], beginAdd) %>%
        lubridate::ymd_hm() %>%
        sort()

    testAllEnds <- 
        c(testET1[!is.na(testET1)], testET2[!is.na(testET2)], testET3[!is.na(testET3)], endAdd) %>%
        lubridate::ymd_hm() %>%
        sort()
    
    # Create the intervals
    testIntervals <- createPrecipInterval(testAllEnds, testAllBegins, 
                                          endExclude=endExclude, beginExclude=beginExclude, 
                                          maxProb=maxProb, sState=sState
                                          )
    
    # Create the precipitation plots
    plotOut <- NULL
    if (makePlots) {
        plotOut <- createPrecipIntervalPlots(testIntervals, testAllBegins, titleText=titleText, 
                                             yAxisText=yAxisText, beginExclude=beginExclude, 
                                             returnPlotsAndData=returnPlotsAndData
                                             )
    }
    
    if (!returnPlotsAndData) { plotOut <- NULL }
    
    # Return all of the key files, along with the parameters used
    keyParams <- list(fileName=deparse(substitute(df)), pType=pType, 
                      endExclude=endExclude, beginExclude=beginExclude, 
                      endAdd=endAdd, beginAdd=beginAdd,
                      maxProb=maxProb, sState=sState
                      )
    list(keyParams=keyParams, 
         testFileProc=testFileProc, 
         testAllBegins=testAllBegins[!(testAllBegins %in% lubridate::ymd_hm(beginExclude))], 
         testAllEnds=testAllEnds[!(testAllEnds %in% lubridate::ymd_hm(endExclude))], 
         testIntervals=testIntervals,
         plotOut=plotOut
         )
}

The full function can then be run to replicate the Las Vegas, NV 2016 rain plots:

# Run for Las Vegas, NV 2016 rainfall
klasRain2016 <- runFullPrecipExtraction(klas2016METAR, 
                                        pType="RA", 
                                        titleText="Las Vegas, NV Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-05-01 0042"),
                                        beginExclude=c(), 
                                        maxProb=1000, 
                                        sState=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KLAS 310056Z 10003KT 10SM CLR 08/M08 A3009 RMK AO2 SLP196 T00831078" "KLAS 310156Z 01005KT 10SM CLR 07/M08 A3011 RMK AO2 SLP204 T00671083" "KLAS 310256Z 01005KT 10SM CLR 06/M09 A3013 RMK AO2 SLP210 T00611089 53011" "KLAS 310356Z 34005KT 10SM CLR 05/M09 A3014 RMK AO2 SLP215 T00501089" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:56:00" "2015-12-31 01:56:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8643
## 2 TRUE          0     1    56
## 3 TRUE          1     0    51
## 4 TRUE          1     1    59
## 5 TRUE          1     2     2
## 6 TRUE          2     1     6
## 7 TRUE          2     2     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   16.00   29.00   52.15   57.75  502.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Confirm that results are the same as when run outside the function
identical(testIntervals, klasRain2016$testIntervals)
## [1] TRUE
identical(testFileProc, klasRain2016$testFileProc)
## [1] TRUE
identical(testAllBegins, klasRain2016$testAllBegins)
## [1] TRUE
identical(testAllEnds, klasRain2016$testAllEnds)
## [1] FALSE
# Show the key parameters used
klasRain2016$keyParams
## $fileName
## [1] "klas2016METAR"
## 
## $pType
## [1] "RA"
## 
## $endExclude
## [1] "2016-05-01 0042"
## 
## $beginExclude
## NULL
## 
## $endAdd
## NULL
## 
## $beginAdd
## NULL
## 
## $maxProb
## [1] 1000
## 
## $sState
## [1] FALSE

The full functions can then be explored for Chicago, IL 2016 rain plots:

# Run for Chicago, IL 2016 rainfall - run with no plots while debugging begin/end exclude
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c(),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -31587.0  -2464.5   -355.0    703.3   -121.0 509393.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-02-02 23:56:00 UTC" "2016-02-03 18:13:00 UTC"
##  [3] "2016-02-03 22:40:00 UTC" "2016-02-29 01:17:00 UTC"
##  [5] "2016-03-01 02:48:00 UTC" "2016-03-01 04:36:00 UTC"
##  [7] "2016-03-01 09:44:00 UTC" "2016-03-01 11:01:00 UTC"
##  [9] "2016-03-07 16:43:00 UTC" "2016-03-08 10:46:00 UTC"
## [11] "2016-03-09 05:56:00 UTC"
## 
## Vector of Ends
##  [1] "2016-02-03 01:25:00 UTC" "2016-02-03 18:43:00 UTC"
##  [3] "2016-02-03 23:00:00 UTC" "2016-02-29 02:39:00 UTC"
##  [5] "2016-03-01 03:11:00 UTC" "2016-03-01 04:14:00 UTC"
##  [7] "2016-03-01 05:30:00 UTC" "2016-03-01 10:17:00 UTC"
##  [9] "2016-03-01 11:23:00 UTC" "2016-03-07 17:49:00 UTC"
## [11] "2016-03-08 10:55:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 402 403
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-23 23:32:00 UTC" "2016-12-24 03:43:00 UTC"
## [3] "2016-12-25 23:40:00 UTC" "2016-12-26 05:20:00 UTC"
## [5] "2016-12-26 08:37:00 UTC"
# Exclude the first problem end time - 2016-03-01 04:14:00 UTC
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414"),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -30696.0   -253.0    -42.0    515.3      5.0 509428.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-03-24 03:38:00 UTC" "2016-03-24 05:49:00 UTC"
##  [3] "2016-03-24 07:38:00 UTC" "2016-03-24 11:32:00 UTC"
##  [5] "2016-03-24 14:41:00 UTC" "2016-03-27 19:59:00 UTC"
##  [7] "2016-03-30 13:42:00 UTC" "2016-03-30 16:34:00 UTC"
##  [9] "2016-03-30 21:13:00 UTC" "2016-03-30 23:27:00 UTC"
## [11] "2016-03-31 01:09:00 UTC"
## 
## Vector of Ends
##  [1] "2016-03-24 05:09:00 UTC" "2016-03-24 05:51:00 UTC"
##  [3] "2016-03-24 08:20:00 UTC" "2016-03-24 14:01:00 UTC"
##  [5] "2016-03-24 18:41:00 UTC" "2016-03-24 22:14:00 UTC"
##  [7] "2016-03-28 00:49:00 UTC" "2016-03-30 13:50:00 UTC"
##  [9] "2016-03-30 16:59:00 UTC" "2016-03-30 21:25:00 UTC"
## [11] "2016-03-31 00:20:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 402
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 03:43:00 UTC" "2016-12-25 23:40:00 UTC"
## [3] "2016-12-26 05:20:00 UTC" "2016-12-26 08:37:00 UTC"
## [5] "2016-12-26 12:23:00 UTC"
# Exclude the second problem end time - 2016-03-24 22:14:00 UTC
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214"),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    15.0    39.0   239.5   119.0 10708.0 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 202 204 206 207 211 213 214 215 224 227 236 240 244
## Vector of Begins
##  [1] "2016-07-01 03:25:00 UTC" "2016-07-06 07:42:00 UTC"
##  [3] "2016-07-07 22:34:00 UTC" "2016-07-13 13:51:00 UTC"
##  [5] "2016-07-13 21:47:00 UTC" "2016-07-13 23:47:00 UTC"
##  [7] "2016-07-15 19:16:00 UTC" "2016-07-15 21:06:00 UTC"
##  [9] "2016-07-17 13:44:00 UTC" "2016-07-18 04:11:00 UTC"
## [11] "2016-07-22 01:46:00 UTC"
## 
## Vector of Ends
##  [1] "2016-07-01 03:38:00 UTC" "2016-07-06 14:08:00 UTC"
##  [3] "2016-07-08 00:36:00 UTC" "2016-07-13 22:07:00 UTC"
##  [5] "2016-07-14 00:54:00 UTC" "2016-07-15 20:39:00 UTC"
##  [7] "2016-07-15 22:27:00 UTC" "2016-07-17 14:38:00 UTC"
##  [9] "2016-07-18 04:44:00 UTC" "2016-07-22 06:34:00 UTC"
## [11] "2016-07-24 01:00:00 UTC"
# Exclude the begin time that creates a 3-day continuous rainfall - 2016-07-13 2347
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214"),
                                        beginExclude=c("2016-07-13 2347"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -30696.0    -44.0     13.0    915.4     54.0 509428.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-08-25 09:29:00 UTC" "2016-08-27 04:07:00 UTC"
##  [3] "2016-08-27 06:24:00 UTC" "2016-08-27 10:40:00 UTC"
##  [5] "2016-08-27 14:06:00 UTC" "2016-08-29 20:59:00 UTC"
##  [7] "2016-08-29 23:15:00 UTC" "2016-08-30 10:23:00 UTC"
##  [9] "2016-09-01 20:08:00 UTC" "2016-09-06 19:54:00 UTC"
## [11] "2016-09-07 17:13:00 UTC"
## 
## Vector of Ends
##  [1] "2016-08-25 09:39:00 UTC" "2016-08-27 05:07:00 UTC"
##  [3] "2016-08-27 10:21:00 UTC" "2016-08-27 13:20:00 UTC"
##  [5] "2016-08-27 14:48:00 UTC" "2016-08-29 20:48:00 UTC"
##  [7] "2016-08-29 21:16:00 UTC" "2016-08-30 00:38:00 UTC"
##  [9] "2016-08-30 13:56:00 UTC" "2016-09-01 20:18:00 UTC"
## [11] "2016-09-06 20:11:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 401
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 03:43:00 UTC" "2016-12-25 23:40:00 UTC"
## [3] "2016-12-26 05:20:00 UTC" "2016-12-26 08:37:00 UTC"
## [5] "2016-12-26 12:23:00 UTC"
# Exclude the third problem end time - 2016-08-29 20:48:00 UTC
# As this is the final data integrity issue, show the plots
kordRain2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-03-01 0414", "2016-03-24 2214", 
                                                     "2016-08-29 2048"
                                                     ),
                                        beginExclude=c("2016-07-13 2347"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8256
## 2 TRUE          0     1   179
## 3 TRUE          1     0   177
## 4 TRUE          1     1   168
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.00   33.50   71.33   86.00 1243.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Show the key parameters used
kordRain2016$keyParams
## $fileName
## [1] "kord2016METAR"
## 
## $pType
## [1] "RA"
## 
## $endExclude
## [1] "2016-03-01 0414" "2016-03-24 2214" "2016-08-29 2048"
## 
## $beginExclude
## [1] "2016-07-13 2347"
## 
## $endAdd
## NULL
## 
## $beginAdd
## NULL
## 
## $maxProb
## [1] 1440
## 
## $sState
## [1] FALSE

The full functions can then be explored for Chicago, IL 2016 snow plots:

# Run for Chicago, IL 2016 snowfall - run with no plots while debugging begin/end exclude
kordSnow2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="SN", 
                                        titleText="Chicago, IL Snowfall (hours) in 2016", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c(),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=FALSE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -348118   -2604     -97    7747      33  520190 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-02-09 00:53:00 UTC" "2016-02-09 02:48:00 UTC"
##  [3] "2016-02-09 18:08:00 UTC" "2016-02-09 18:55:00 UTC"
##  [5] "2016-02-09 23:25:00 UTC" "2016-02-10 02:36:00 UTC"
##  [7] "2016-02-10 04:19:00 UTC" "2016-02-12 13:32:00 UTC"
##  [9] "2016-02-12 14:28:00 UTC" "2016-02-12 17:15:00 UTC"
## [11] "2016-02-14 17:06:00 UTC"
## 
## Vector of Ends
##  [1] "2016-02-09 01:46:00 UTC" "2016-02-09 17:19:00 UTC"
##  [3] "2016-02-09 18:41:00 UTC" "2016-02-09 22:30:00 UTC"
##  [5] "2016-02-09 23:33:00 UTC" "2016-02-10 02:16:00 UTC"
##  [7] "2016-02-10 03:38:00 UTC" "2016-02-10 04:31:00 UTC"
##  [9] "2016-02-12 13:45:00 UTC" "2016-02-12 16:00:00 UTC"
## [11] "2016-02-12 17:45:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 123 124 125 126 127
## Vector of Begins
## [1] "2016-12-23 20:18:00 UTC" "2016-12-23 23:01:00 UTC"
## [3] "2016-12-24 03:43:00 UTC" "2016-12-29 18:33:00 UTC"
## [5] "2016-12-30 02:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-17 09:46:00 UTC" "2016-12-17 20:09:00 UTC"
## [3] "2016-12-18 11:07:00 UTC" "2016-12-23 17:50:00 UTC"
## [5] "2016-12-23 20:34:00 UTC"
# After investigation, there are five extraneous end times in the Chicago snowfall data - exclude them
# Create the plots
kordSnow2016 <- runFullPrecipExtraction(kord2016METAR, 
                                        pType="SN", 
                                        titleText="Chicago, IL Snowfall (hours) in 2016", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c("2016-02-10 0216", "2016-03-24 2331", 
                                                     "2016-04-08 2300", "2016-04-09 0446",
                                                     "2016-12-24 0309"
                                                     ),
                                        beginExclude=c(), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   20.25   47.00  136.29  152.25 1016.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Show the key parameters used
kordSnow2016$keyParams
## $fileName
## [1] "kord2016METAR"
## 
## $pType
## [1] "SN"
## 
## $endExclude
## [1] "2016-02-10 0216" "2016-03-24 2331" "2016-04-08 2300" "2016-04-09 0446"
## [5] "2016-12-24 0309"
## 
## $beginExclude
## NULL
## 
## $endAdd
## NULL
## 
## $beginAdd
## NULL
## 
## $maxProb
## [1] 1440
## 
## $sState
## [1] FALSE

Example #30: Checking Intervals for Consistency

The intervals created above can be compared against the hourly METAR observations for consistency. In general, RAE53 would lead to no precipitation recorded at 53Z while RAB53 would lead to precipitation recorded at 53Z. So, the intervals created should be reduced by 1 for purposes of overlap analysis.

Example code includes:

library(lubridate)  # lubridate has the %within% function and creates and checks intervals
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
intervalConsistency <- function(lst, pType){

    # Extract the beginning and interval times
    begins <- lst[["testAllBegins"]]
    ends <- lst[["testAllEnds"]]
    durs <- lst[["testIntervals"]]

    # Create intervals from the raw list file
    precipInts <- interval(begins, begins + durs - 1)

    
    # Extract the METAR and date-time information
    metar <- lst[["testFileProc"]][["origMETAR"]]
    dtime <- lst[["testFileProc"]][["dtime"]]
    
    # Take each METAR observation and check two factors
    # Is the precipitation type recorded in that METAR?
    # Does that METAR fall in any of the intervals?
    precipMETAR <- grepl(paste0(pType, ".*RMK"), metar, perl=TRUE)
    intMETAR <- sapply(dtime, FUN=function(x) {x %within% precipInts %>% any()})

    # Check for the consistency of the observations and print the mismatches
    print(table(precipMETAR, intMETAR))

    mism <- which(precipMETAR != intMETAR)
    if (length(mism) == 0) {
        cat("\nFull matches between METAR observations and intervals\n")
    } else {
        for (x in mism) {
            cat("\nMismatch at time", strftime(dtime[x], format="%Y-%m-%d %H:%M", tz="UTC"), "UTC\n")
            print(metar[max(1, x-2):min(length(metar), x+2)])
        }
    }
    
    list(precipInts=precipInts, mismatches=mism, mismatchTimes=dtime[mism])
}

# Mismatch due to TSRA without RAB on 2016-04-30
tmp <- intervalConsistency(klasRain2016, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8710    0
##       TRUE      1  107
## 
## Mismatch at time 2016-04-30 23:56 UTC
## [1] "KLAS 302156Z 01012G18KT 10SM FEW045 BKN060 BKN110 18/07 A2973 RMK AO2 SLP055 T01830072 $"                                                                                           
## [2] "KLAS 302256Z 04009KT 10SM FEW050 BKN090 OVC110 18/07 A2972 RMK AO2 SLP054 SH DSNT SE-S T01780067 $"                                                                                 
## [3] "KLAS 302356Z 04026G31KT 6SM TSRA SCT035 BKN080CB OVC100 16/10 A2978 RMK AO2 PK WND 04031/2356 PRESRR SLP075 OCNL LTGICCG N TS N-NE MOV SW P0000 60000 T01610100 10189 20106 53013 $"
## [4] "KLAS 010056Z 17004KT 10SM FEW026 BKN055 BKN100 14/11 A2976 RMK AO2 PK WND 04033/2357 SLP070 TSE19 MOV SW RAE42 P0037 T01390106 $"                                                   
## [5] "KLAS 010156Z 27007KT 10SM FEW050 BKN095 BKN220 14/09 A2975 RMK AO2 SLP068 T01390094 $"
# Mismatches due to malformatted METAR with missing data on 2016-07-13
# Mismatch due to RA vs FZRA on 2016-02-29
# Mismatch due to -RASN without RAB on 2016-03-24
# Mismatch due to TSRA without RAB on 2016-08-29
tmp <- intervalConsistency(kordRain2016, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8342    4
##       TRUE      3  466
## 
## Mismatch at time 2016-03-01 03:51 UTC
## [1] "KORD 010151Z 06012KT 10SM -DZ OVC070 00/M06 A3002 RMK AO2 SLP172 P0000 T00001061"                                                                  
## [2] "KORD 010251Z 05011KT 10SM -RA BKN050 BKN070 OVC090 M01/M05 A3004 RMK AO2 DZE48RAB48 SLP178 60000 P0000 T10061050 51017"                            
## [3] "KORD 010351Z 06014G21KT 10SM -FZRA FEW017 SCT045 OVC055 M02/M05 A3003 RMK AO2 RAE11FZRAB39SNB11E39 SLP175 P0000 I1000 T10171050"                   
## [4] "KORD 010451Z 06015KT 10SM -FZRA BKN014 OVC019 M02/M05 A3001 RMK AO2 SLP169 FZRAE14B36 P0003 I1000 T10221050"                                       
## [5] "KORD 010551Z 07013KT 10SM BKN014 OVC070 M02/M05 A3000 RMK AO2 FZRAE30PLB09E48 SLP166 P0000 60000 I1000 I6000 T10221050 10006 21022 401111022 56011"
## 
## Mismatch at time 2016-03-24 21:51 UTC
## [1] "KORD 241951Z 01019KT 10SM BKN010 OVC015 03/02 A2948 RMK AO2 SLP987 T00330017 $"                                         
## [2] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [3] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [4] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [5] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## 
## Mismatch at time 2016-07-13 18:51 UTC
## [1] "KORD 131251Z 00000KT 10SM FEW055 FEW075 BKN250 25/21 A2990 RMK AO2 SLP118 T02500211"               
## [2] "KORD 131351Z 18009KT 9SM -RA FEW070 BKN090 BKN250 26/22 A2991 RMK AO2 RAB51 SLP124 P0000 T02560217"
## [3] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"            
## [4] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                      
## [5] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"         
## 
## Mismatch at time 2016-07-13 19:51 UTC
## [1] "KORD 131351Z 18009KT 9SM -RA FEW070 BKN090 BKN250 26/22 A2991 RMK AO2 RAB51 SLP124 P0000 T02560217"        
## [2] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"                    
## [3] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                              
## [4] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                 
## [5] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"
## 
## Mismatch at time 2016-07-13 20:51 UTC
## [1] "KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A2984 RMK AO2 SLP099 T02780217"                    
## [2] "KORD 131951Z 23008KT 10SM BKN030 OVC070 29/22 A2981 RMK AO2 SLP089 T02890222"                              
## [3] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                 
## [4] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"
## [5] "KORD 132251Z 22008G17KT 10SM FEW060 FEW085 SCT210 OVC250 28/22 A2977 RMK AO2 RAE07 SLP077 P0000 T02780222" 
## 
## Mismatch at time 2016-07-13 22:51 UTC
## [1] "KORD 132051Z 21009KT 10SM SCT025 BKN033 BKN040 28/22 A2980 RMK AO2 SLP087 T02830222 56022"                                                                                 
## [2] "KORD 132151Z 20010KT 10SM -RA FEW060 SCT080 BKN210 OVC250 28/23 A2979 RMK AO2 RAB47 SLP081 P0000 T02830233"                                                                
## [3] "KORD 132251Z 22008G17KT 10SM FEW060 FEW085 SCT210 OVC250 28/22 A2977 RMK AO2 RAE07 SLP077 P0000 T02780222"                                                                 
## [4] "KORD 132351Z 26010KT 10SM -TSRA FEW055 SCT085CB BKN160 OVC180 27/22 A2978 RMK AO2 RAB47 TSB35 SLP077 OCNL LTGIC VC W TS VC W MOV E P0000 60000 T02720222 10289 20272 55009"
## [5] "KORD 140051Z 16017G26KT 10SM -TSRA FEW043 SCT090CB BKN110 BKN160 22/21 A2972 RMK AO2 PK WND 13030/0035 SLP059 FRQ LTGIC VC E-SE TS VC E-SE MOV E P0004 T02170206"          
## 
## Mismatch at time 2016-08-29 19:51 UTC
## [1] "KORD 291751Z 09006KT 10SM SCT026 SCT048 BKN170 BKN250 29/21 A3023 RMK AO2 SLP233 T02940211 10294 20222 58003"                                                 
## [2] "KORD 291851Z 06006KT 10SM FEW025 BKN048 BKN170 BKN250 30/21 A3021 RMK AO2 SLP226 OCNL LTGIC DSNT SW CB DSNT SW T03000211 $"                                   
## [3] "KORD 291951Z 25009KT 1SM R10L/1200VP6000FT +TSRA BKN041CB BKN055 OVC110 25/21 A3022 RMK AO2 SLP230 TSB11 CONS LTGICCG OHD TS OHD MOV E P0001 T02500211 $"     
## [4] "KORD 292051Z 01005KT 9SM TS SCT055CB BKN110 OVC140 24/22 A3019 RMK AO2 RAE48 SLP217 OCNL LTGICCG VC NW-W-NE TS VC NW-N-NE MOV E P0039 60040 T02390222 58010 $"
## [5] "KORD 292151Z 02005KT 10SM FEW050 FEW075 SCT140 BKN180 OVC250 25/22 A3018 RMK AO2 RAB2059E16 TSE06 SLP215 P0000 T02500222 $"
# Mismatches due to SN without SNB on 2016-02-09, 2016-03-24, 2016-04-08, 2016-12-23
tmp <- intervalConsistency(kordSnow2016, pType="SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8532    0
##       TRUE      7  276
## 
## Mismatch at time 2016-02-10 00:51 UTC
## [1] "KORD 092251Z 32012KT 10SM SCT032 OVC060 M08/M14 A2990 RMK AO2 SLP136 SNE30 P0000 T10831139"                            
## [2] "KORD 092351Z 31014KT 10SM SCT028 BKN036 OVC070 M09/M14 A2992 RMK AO2 SNB25E33 SLP144 60000 T10891144 11078 21089 53023"
## [3] "KORD 100051Z 31012KT 4SM -SN FEW024 OVC040 M09/M13 A2993 RMK AO2 SLP148 P0003 T10941133"                               
## [4] "KORD 100151Z 29011KT 5SM -SN OVC040 M09/M13 A2994 RMK AO2 SLP150 P0002 T10941133"                                      
## [5] "KORD 100251Z 29011KT 3SM -SN SCT021 OVC031 M09/M12 A2994 RMK AO2 SNE16B36 SLP150 60000 T10941122 51006"                
## 
## Mismatch at time 2016-02-10 01:51 UTC
## [1] "KORD 092351Z 31014KT 10SM SCT028 BKN036 OVC070 M09/M14 A2992 RMK AO2 SNB25E33 SLP144 60000 T10891144 11078 21089 53023"
## [2] "KORD 100051Z 31012KT 4SM -SN FEW024 OVC040 M09/M13 A2993 RMK AO2 SLP148 P0003 T10941133"                               
## [3] "KORD 100151Z 29011KT 5SM -SN OVC040 M09/M13 A2994 RMK AO2 SLP150 P0002 T10941133"                                      
## [4] "KORD 100251Z 29011KT 3SM -SN SCT021 OVC031 M09/M12 A2994 RMK AO2 SNE16B36 SLP150 60000 T10941122 51006"                
## [5] "KORD 100351Z 31012KT 10SM BKN042 OVC085 M10/M14 A2994 RMK AO2 SNE38 SLP149 P0002 T11001139"                            
## 
## Mismatch at time 2016-03-24 21:51 UTC
## [1] "KORD 241951Z 01019KT 10SM BKN010 OVC015 03/02 A2948 RMK AO2 SLP987 T00330017 $"                                         
## [2] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [3] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [4] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [5] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## 
## Mismatch at time 2016-03-24 22:51 UTC
## [1] "KORD 242051Z 34014G22KT 10SM OVC010 03/01 A2953 RMK AO2 PRESRR SLP004 T00280011 53009 $"                                
## [2] "KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00 A2960 RMK AO2 SLP029 T00110000 $"                             
## [3] "KORD 242251Z 35011G19KT 5SM -SN BR SCT007 BKN019 OVC046 01/00 A2962 RMK AO2 RAE14 SLP037 T00110000 $"                   
## [4] "KORD 242351Z 34016G23KT 10SM FEW011 BKN018 OVC025 01/M02 A2968 RMK AO2 SLP055 SNE31 60012 T00061017 10039 20006 53025 $"
## [5] "KORD 250051Z 35016G21KT 10SM SCT017 OVC025 00/M02 A2971 RMK AO2 PK WND 36029/0031 SLP066 T00001022 $"                   
## 
## Mismatch at time 2016-04-08 22:51 UTC
## [1] "KORD 082051Z 29018G24KT 10SM FEW046 SCT055 OVC090 04/M03 A2981 RMK AO2 PK WND 30035/2025 SNE03B28E40 SLP098 P0000 60000 T00441033 50002"
## [2] "KORD 082151Z 32016G25KT 9SM BKN034 BKN046 OVC055 03/M03 A2982 RMK AO2 PK WND 32030/2057 SNB29E46 SLP103 P0000 T00281033"                
## [3] "KORD 082251Z 31014G31KT 9SM -SN SCT045 BKN050 OVC080 02/M06 A2983 RMK AO2 PK WND 32031/2243 SLP108 P0000 T00221061"                     
## [4] "KORD 082351Z 31014G25KT 10SM SCT060 SCT080 03/M09 A2984 RMK AO2 SNE00 SLP112 P0000 60000 T00281089 10050 20017 52012"                   
## [5] "KORD 090051Z 35012G21KT 10SM FEW060 SCT080 01/M07 A2987 RMK AO2 PK WND 32030/2355 SLP121 T00111072"                                     
## 
## Mismatch at time 2016-12-24 01:51 UTC
## [1] "KORD 232351Z 14008KT 6SM BR SCT037 OVC055 01/M01 A2996 RMK AO2 RAE01B27E32SNB01E48 SLP152 4/003 P0003 60006 T00111006 10017 20011 56024"
## [2] "KORD 240051Z 20007KT 6SM BR FEW013 OVC055 01/M01 A2997 RMK AO2 SLP153 T00111006"                                                        
## [3] "KORD 240151Z 17011KT 4SM -SN BR SCT015 OVC039 01/M01 A2993 RMK AO2 SLP143 P0000 T00111006"                                              
## [4] "KORD 240251Z 20007KT 2SM -SN BR OVC008 01/00 A2994 RMK AO2 SLP143 P0000 60000 T00110000 58008"                                          
## [5] "KORD 240351Z 21008KT 2 1/2SM -SN BR OVC006 01/00 A2992 RMK AO2 RAB09E43SNE09B43 SLP138 P0004 T00060000"                                 
## 
## Mismatch at time 2016-12-24 02:51 UTC
## [1] "KORD 240051Z 20007KT 6SM BR FEW013 OVC055 01/M01 A2997 RMK AO2 SLP153 T00111006"                       
## [2] "KORD 240151Z 17011KT 4SM -SN BR SCT015 OVC039 01/M01 A2993 RMK AO2 SLP143 P0000 T00111006"             
## [3] "KORD 240251Z 20007KT 2SM -SN BR OVC008 01/00 A2994 RMK AO2 SLP143 P0000 60000 T00110000 58008"         
## [4] "KORD 240351Z 21008KT 2 1/2SM -SN BR OVC006 01/00 A2992 RMK AO2 RAB09E43SNE09B43 SLP138 P0004 T00060000"
## [5] "KORD 240451Z 23007KT 2SM -DZ BR OVC006 01/00 A2992 RMK AO2 DZB20SNE10 SLP139 P0000 T00110000"

Example #31: All Precipitation Types

The METAR data can be explored to find all the precipitation types it contains. Broadly, a precipitation type should meet five criteria:

  • Occurs prior to RMK (the remarks section has letter-only data that is not precipitation)
  • Preceded by a space
  • Optionally, begins with + (heavy) or - (light)
  • Contains only capital letters (no numbers) after the optional +/-
  • Trailed by a space

This will extract some non-precipitation types such as CLR and AUTO, so some iteration is expected.

Example code includes:

# Function to find precipitation types
findPrecipTypes <- function(lst, precipRegex="(?<= )[+-]?[A-Z]+(?= )", priorTo="RMK", exclTypes=NULL) {

    metar <- lst[["testFileProc"]][["origMETAR"]]
    
    # Keep only everything prior to priorTo
    if (!is.null(priorTo)) {
        rmkLoc <- str_locate(metar, priorTo)
        rmkLoc[is.na(rmkLoc)] <- -1  # keep everything if NA
        metar <- str_sub(metar, start=1, end=pmax(-1, rmkLoc[, 1]-1))
    }
    
    # Exclude any items from the METAR
    if (!is.null(exclTypes)) {
        for (excl in exclTypes) {
            metar <- str_replace(metar, excl, "")
        }
    }
    
    # Is there a possible precipitation type in the file?
    pExists <- grepl(precipRegex, metar, perl=TRUE)
    
    cat("\nPrecipitation data status by METAR record\n\n")
    print(table(pExists))
    cat("\n")
    
    # Find the precipitation matches
    precipMatches <- str_match_all(metar, pattern=precipRegex)
    
    # Confirm that there are a maximum of two precipitation types per METAR
    listLengths <- sapply(precipMatches, FUN=length)
    if (max(listLengths) > 4) {
        cat("\nMaximum combinations observed is:", max(listLengths), "\n")
        stop("Hard-coded for at most 4 precipitation matches, please investigate")
    } else if (max(listLengths)==0) {
        cat("\nNo precipitation detected in this file\n")
    }
    
    if (max(listLengths) %in% c(2, 3, 4)) {
        cat("\nMultiple Precipitation types in the same record include\n")
        sapply(precipMatches[listLengths %in% c(2, 3, 4)], FUN=paste, collapse=" ") %>% 
            table() %>% 
            sort(decreasing=TRUE) %>%
            print()
    }
    
    if (sum(listLengths==1) > 0) {
        cat("\nSingle Precipitation types in the same record include\n")
        sapply(precipMatches[listLengths==1], FUN=c) %>% 
            table() %>% 
            sort(decreasing=TRUE) %>%
            print()
    }    

    # Extract the second column and summarize precipitation types
    # table(precipMatches[, 2]) %>% sort(decreasing=TRUE) %>% print()
    
    precipMatches
}

# Original pass for Chicago, IL
x1 <- findPrecipTypes(kordRain2016)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  6792  2023 
## 
## 
## Multiple Precipitation types in the same record include
## .
##     -RA BR     -SN BR     -DZ BR      RA BR    TSRA BR   +TSRA BR     +RA BR 
##        151        108         47         34         11         10          7 
##      SN FG     BR CLR    SN FZFG -FZDZSN BR   -TSRA BR     -DZ FG   -FZDZ BR 
##          7          6          6          4          3          2          2 
##   -RASN BR     -PL BR   -RAPL BR   -SNPL BR     +SN FG   +SN FZFG    BR BCFG 
##          2          1          1          1          1          1          1 
##      DZ BR     HZ CLR    MIFG BR      TS BR   VCTS -RA    VCTS BR 
##          1          1          1          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##     CLR      BR     -RA     -SN   -TSRA      FG      HZ     -DZ      TS   +TSRA 
##     805     327     210     150      26      25      24      13       9       3 
##      RA    TSRA    VCTS   -FZRA    AUTO    FZFG -FZRAPL   -RASN   -SNPL      SN 
##       3       3       3       2       2       2       1       1       1       1
# Exclude AUTO and CLR as they are a record state and cloud type, not precipitation
x2 <- findPrecipTypes(kordRain2016, exclTypes=c("CLR", "AUTO"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  7599  1216 
## 
## 
## Multiple Precipitation types in the same record include
## .
##     -RA BR     -SN BR     -DZ BR      RA BR    TSRA BR   +TSRA BR     +RA BR 
##        151        108         47         34         11         10          7 
##      SN FG    SN FZFG -FZDZSN BR   -TSRA BR     -DZ FG   -FZDZ BR   -RASN BR 
##          7          6          4          3          2          2          2 
##     -PL BR   -RAPL BR   -SNPL BR     +SN FG   +SN FZFG    BR BCFG      DZ BR 
##          1          1          1          1          1          1          1 
##    MIFG BR      TS BR   VCTS -RA    VCTS BR 
##          1          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##      BR     -RA     -SN   -TSRA      FG      HZ     -DZ      TS   +TSRA      RA 
##     333     210     150      26      25      25      13       9       3       3 
##    TSRA    VCTS   -FZRA    FZFG -FZRAPL   -RASN   -SNPL      SN 
##       3       3       2       2       1       1       1       1
# Original pass for Las Vegas, NV
x3 <- findPrecipTypes(klasRain2016)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  6205  2613 
## 
## 
## Multiple Precipitation types in the same record include
## .
##   AUTO CLR VCBLDU CLR     -RA BR      RA BR   -TSRA BR   -RA VCFG     +RA BR 
##          7          7          5          3          2          1          1 
##      FU HZ    HZ BLDU    VCTS RA 
##          1          1          1 
## 
## Single Precipitation types in the same record include
## .
##    CLR    -RA VCBLDU  -TSRA     TS     HZ   VCFG     BR   AUTO     RA   VCTS 
##   2393     72     54     17     13      8      8      4      3      3      3 
##     DU   TSRA  +TSRA   BLDU 
##      2      2      1      1
# Exclude AUTO and CLR as they are a record state and cloud type, not precipitation
x4 <- findPrecipTypes(klasRain2016, exclTypes=c("CLR", "AUTO"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  8608   210 
## 
## 
## Multiple Precipitation types in the same record include
## .
##   -RA BR    RA BR -TSRA BR -RA VCFG   +RA BR    FU HZ  HZ BLDU  VCTS RA 
##        5        3        2        1        1        1        1        1 
## 
## Single Precipitation types in the same record include
## .
##    -RA VCBLDU  -TSRA     TS     HZ   VCFG     BR     RA   VCTS     DU   TSRA 
##     72     61     17     13      8      8      4      3      3      2      2 
##  +TSRA   BLDU 
##      1      1

Example #32: Extracting Precipitation Amounts

The METAR also contains summary of precipitation amounts that have occurred in the past hour(s). Broadly, the format is:

  • Pdddd - the amount of liquid precipitation equivalent that has fallen in the past hour
  • 6dddd - the amount of liquid precipitation equivalent that has fallen in the past 3/6 hours (for 0Z, 6Z, 12Z, and 18Z it is 6 hours; and for 3Z, 9Z, 15Z, and 21Z it is 3 hours)
  • 7dddd - the amount of liquid precipitation equivalent that has fallen in the past 24 hours

The data as such can be extracted from the METAR.

Example code includes:

# Helper function to extract and convert data
extractConvertPrecipData <- function(var, regPattern) {
    
    mtxPrecip <- str_match(var, pattern=regPattern)
    vecPrecip <- mtxPrecip[, 2]
    vecPrecip[is.na(vecPrecip)] <- "0000"
    vecPrecip <- as.integer(vecPrecip) / 100
    
    vecPrecip
}

# Function to extract key precipitation information
extractLiquidPrecipAmounts <- function(lst) {
    
    # Pull the metar and the dtime
    metar <- lst[["testFileProc"]][["origMETAR"]]
    dtime <- lst[["testFileProc"]][["dtime"]]
    
    # Extract the Pdddd amounts (only available if liquid precipitation equivalent non-zero)
    pAmounts1Hour <- extractConvertPrecipData(metar, regPattern="RMK.* P(\\d{4})")
    cat("\nHourly totals for liquid precipitation equivalents:\n\n")
    table(pAmounts1Hour) %>% print()
    
    # Extract the 6dddd amounts
    pAmounts6Hour <- extractConvertPrecipData(metar, regPattern="RMK.* 6(\\d{4})")
    cat("\n3/6-hourly totals for liquid precipitation equivalents:\n\n")
    table(pAmounts6Hour) %>% print()
    
    # Extract the 7dddd amounts
    pAmounts24Hour <- extractConvertPrecipData(metar, regPattern="RMK.* 7(\\d{4})")
    cat("\n3/24-hourly totals for liquid precipitation equivalents:\n\n")
    table(pAmounts24Hour) %>% print()
    
    # Create a tibble and add the Zulu time
    tbl <- tibble::tibble(metar=metar, 
                          dtime=dtime, 
                          p1Hour=pAmounts1Hour, 
                          p3or6Hour=pAmounts6Hour, 
                          p24Hour=pAmounts24Hour
                          ) %>%
        mutate(zTime=(lubridate::hour(dtime) + ifelse(lubridate::minute(dtime)==0, 0, 1)) %% 24, 
               p6Hour=ifelse((zTime %% 6)==0, p3or6Hour, 0), 
               p3Hour=ifelse((zTime %% 3)==0, p3or6Hour-p6Hour, 0)
               )
    
    # Summarize the key amounts by Zulu time
    tbl %>%
        group_by(zTime) %>%
        summarize_if(is.numeric, sum) %>%
        as.data.frame() %>%
        print()
    
    tbl
    
}

klasPrecip <- extractLiquidPrecipAmounts(klasRain2016)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.13 0.15 0.16 0.29 
## 8721   28   14    8   14    7    6    3    3    2    2    3    2    1    1    1 
## 0.31 0.37 
##    1    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.08 0.09  0.1 0.11 0.13 0.14 0.15 0.16 0.19 
## 8739   15   13   10    6    3    3    2    4    1    3    5    1    1    1    1 
## 0.21 0.22 0.33 0.35 0.37 0.39 0.43 0.47 0.56 
##    1    1    1    1    2    1    1    1    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.08 0.09  0.1 0.19 0.22 0.24 0.25 0.54 0.61 0.84 
## 8790    5    3    4    1    2    1    2    1    1    2    1    1    1    1    1 
## 0.93 
##    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   0.19      1.76    0.00   1.76   0.00
## 2      1   0.67      0.00    0.00   0.00   0.00
## 3      2   0.02      0.00    0.00   0.00   0.00
## 4      3   0.07      0.76    0.00   0.00   0.76
## 5      4   0.13      0.00    0.00   0.00   0.00
## 6      5   0.11      0.00    0.00   0.00   0.00
## 7      6   0.19      1.19    0.00   1.19   0.00
## 8      7   0.09      0.00    0.00   0.00   0.00
## 9      8   0.12      0.00    0.00   0.00   0.00
## 10     9   0.14      0.35    0.00   0.00   0.35
## 11    10   0.11      0.00    0.00   0.00   0.00
## 12    11   0.18      0.00    0.00   0.00   0.00
## 13    12   0.17      0.81    4.77   0.81   0.00
## 14    13   0.29      0.00    0.00   0.00   0.00
## 15    14   0.23      0.00    0.00   0.00   0.00
## 16    15   0.21      0.73    0.00   0.00   0.73
## 17    16   0.12      0.00    0.00   0.00   0.00
## 18    17   0.09      0.00    0.00   0.00   0.00
## 19    18   0.07      1.01    0.00   1.01   0.00
## 20    19   0.01      0.00    0.00   0.00   0.00
## 21    20   0.35      0.00    0.00   0.00   0.00
## 22    21   0.25      0.61    0.00   0.00   0.61
## 23    22   0.57      0.00    0.00   0.00   0.00
## 24    23   0.39      0.00    0.00   0.00   0.00
kordPrecip <- extractLiquidPrecipAmounts(kordRain2016)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8243  150   89   66   57   37   23   22   14   13   11    8   11    6    6    8 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.24 0.25 0.26 0.27 0.28 0.29  0.3 0.31 0.32 
##    4    3    2    2    6    3    1    3    1    1    1    2    3    1    1    1 
## 0.33 0.34 0.37 0.39 0.41 0.45 0.49 0.52 0.57 0.59 0.73 0.74  0.8 1.16 
##    1    1    1    2    1    1    1    1    1    2    1    1    1    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8424   73   46   24   17   23   12   12   12   11   15    5    8   11    5    5 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.23 0.24 0.25 0.26 0.27 0.28 0.29  0.3 0.31 0.32 
##    9    4    8    4    2    3    4    5    2    5    1    1    3    7    3    2 
## 0.34 0.36 0.37 0.39  0.4 0.41 0.43 0.45 0.46 0.48 0.49  0.5 0.56 0.57 0.58 0.63 
##    1    1    2    3    4    3    1    2    2    3    4    2    1    1    2    2 
## 0.66 0.73 0.74 0.75 0.76 0.77 0.79  0.8 0.84 0.93 1.03 1.07 1.24 
##    1    1    1    1    1    1    1    2    1    1    1    1    2 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8687    8   14    6    3    4    5    5    3    2    3    1    3    3    2    3 
## 0.16 0.17 0.18  0.2 0.21 0.24 0.25 0.27 0.28  0.3 0.31 0.32 0.33 0.34 0.36 0.38 
##    2    2    4    1    3    2    1    1    2    2    2    3    1    1    1    1 
##  0.4 0.43 0.44 0.47 0.49  0.5 0.55 0.57 0.63 0.66 0.67 0.68 0.75 0.76 0.77 0.78 
##    1    1    2    1    2    3    1    1    1    1    1    1    1    1    1    1 
## 0.79 0.82 0.88 0.91 0.93 0.94 0.95 0.96 1.04 1.05 1.12  1.3 1.31 
##    1    1    1    1    1    1    1    2    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   2.83     11.21    0.00  11.21   0.00
## 2      1   1.99      0.00    0.00   0.00   0.00
## 3      2   2.42      0.00    0.00   0.00   0.00
## 4      3   2.44      6.95    0.00   0.00   6.95
## 5      4   2.12      0.00    0.00   0.00   0.00
## 6      5   1.51      0.00    0.00   0.00   0.00
## 7      6   0.87     10.52    0.00  10.52   0.00
## 8      7   1.47      0.00    0.00   0.00   0.00
## 9      8   1.54      0.00    0.00   0.00   0.00
## 10     9   1.60      4.64    0.00   0.00   4.64
## 11    10   0.83      0.00    0.00   0.00   0.00
## 12    11   1.31      0.00    0.00   0.00   0.00
## 13    12   1.93      8.03   36.62   8.03   0.00
## 14    13   1.09      0.00    0.00   0.00   0.00
## 15    14   1.42      0.00    0.00   0.00   0.00
## 16    15   0.60      2.87    0.00   0.00   2.87
## 17    16   1.07      0.00    0.00   0.00   0.00
## 18    17   1.24      0.00    0.00   0.00   0.00
## 19    18   1.46      7.16    0.00   7.16   0.00
## 20    19   0.95      0.00    0.00   0.00   0.00
## 21    20   1.29      0.00    0.00   0.00   0.00
## 22    21   2.71      5.37    0.00   0.00   5.37
## 23    22   1.30      0.00    0.00   0.00   0.00
## 24    23   1.40      0.00    0.00   0.00   0.00

Precipitation summarize can then be plotted by various time intervals:

plotPrecipHistogram <- function(df, var, xlab, title, mod=1, rem=0) {

    # Create a variable for whether the modulo matches the desired remainder
    df <- df %>%
        mutate(isMod=(df$zTime %% mod)==rem)
    
    # Separate in to records for further proceesing and records to discard
    dfUse <- df %>%
        filter(isMod)
    dfDiscard <- df %>%
        filter(!isMod)
    
    # Summarize the discarded records
    cat(nrow(dfDiscard), "records have been discarded due to not matching the modulo rules")
    cat("\nThese discarded rows have", sum(dfDiscard %>% pull(var)), "inches of precipitation\n")
    
    numZero <- sum(dfUse[, var]==0)
    numTotal <- nrow(dfUse)
    pZero <- round(numZero/numTotal, 3)

    p <- dfUse %>%
        filter_at(vars(var), any_vars(. > 0)) %>%
        ggplot(aes_string(x=var)) + 
        geom_histogram() + 
        labs(x=xlab, y="Frequency", title=title, 
             subtitle=paste0("Includes only the ", 100*(1-pZero), "% of non-zero observations (", 
                             numZero, " of ", numTotal, " observations are zero)"
                             )
         )
    
    print(p)

}

plotPrecipHistogram(klasPrecip, 
                    var="p1Hour", 
                    xlab="1-hour precipitation (inches)", 
                    title="Las Vegas, NV (2016) Hourly METAR"
                    )
## 0 records have been discarded due to not matching the modulo rules
## These discarded rows have 0 inches of precipitation
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(var)` instead of `var` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plotPrecipHistogram(klasPrecip, 
                    var="p6Hour", 
                    xlab="6-hour precipitation (inches)", 
                    title="Las Vegas, NV (2016) Hourly METAR", 
                    mod=6
                    )
## 7349 records have been discarded due to not matching the modulo rules
## These discarded rows have 0 inches of precipitation
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

plotPrecipHistogram(klasPrecip, 
                    var="p24Hour", 
                    xlab="24-hour precipitation (inches)", 
                    title="Las Vegas, NV (2016) Hourly METAR", 
                    mod=24, 
                    rem=12
                    )
## 8450 records have been discarded due to not matching the modulo rules
## These discarded rows have 0 inches of precipitation
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Example #33: Validating Consistency of Precipitation Amounts

The precipitation amounts per day summed from the 1-hour, 6-hour, and 24-hour columns should be nearly identical give or take rounding errors.

Example code includes:

checkPrecipConsistency <- function(df, title, subT="", hour24=12, maxDelta=0.02,
                                   sumVars=c("p24Hour", "p6Hour", "p1Hour"), 
                                   yearsUse=NULL
                                   ) {

    # Create the data file for analysis - updated time, summarized by month and day
    dfPrecip <- df %>% 
        mutate(dtUse=dtime-lubridate::hours(hour24), 
               year=lubridate::year(dtUse), 
               month=lubridate::month(dtUse), 
               day=lubridate::day(dtUse), 
               n=1
               ) %>% 
        group_by(year, month, day) %>% 
        summarize_if(is.numeric, sum) %>%
        ungroup()
    
    head(dfPrecip) %>% 
        select_at(vars(all_of(c("year", "month", "day", "n", sumVars)))) %>%
        print()
    
    
    # Plot the monthly totals
    p1Data <- dfPrecip %>%
        mutate(ym=paste0(year, "-", str_pad(month, width=2, side="left", pad="0"))) %>%
        select_at(vars(all_of(c("ym", sumVars)))) %>%
        group_by(ym) %>%
        summarize_all(sum)
    
    # Filter to only the desired years if specified
    if (!is.null(yearsUse)) {
        p1Data <- p1Data %>%
            filter(as.integer(str_sub(ym, 1, 4)) %in% yearsUse)
    }
    
    print(p1Data)
    print(p1Data %>% select(-ym) %>% colSums())
    
    p1 <- p1Data %>%
        pivot_longer(-ym, names_to="timePeriod", values_to="inchesPrecip") %>%
        ggplot(aes(x=factor(ym), y=inchesPrecip, group=timePeriod, color=timePeriod)) + 
            geom_line(lwd=1.5) + 
        labs(x="Month", y="Inches of Liquid Precipitation", title=title, subtitle=subT) + 
        ylim(0, NA)
    print(p1)
    
    # Output any days that have differences of more than maxDelta inches
    mismPrecip <- dfPrecip %>%
        select_at(vars(c("month", "day", all_of(sumVars)))) 
    
    mismMinMax <- mismPrecip %>%
        select_at(vars(all_of(sumVars))) %>%
        apply(1, FUN=function(x) { c(max(x), min(x), diff(range(x))) }) %>%
        t() %>%
        as.data.frame()
    names(mismMinMax) <- c("maxPrecip", "minPrecip", "delta")
    
    mismPrecip <- bind_cols(mismPrecip, mismMinMax)

    cat("\nMismatch precipitation amounts by day are:\n")
    round(mismPrecip$delta, 2) %>% table() %>% print()
    
    cat("\n\nMismatch days of worse than maxDelta inches include\n")
    mismPrecip %>%
        filter(delta > maxDelta) %>%
        as.data.frame() %>%
        print()
    
}

checkPrecipConsistency(klasPrecip, title="Las Vegas, NV 2016 Precipitation by Month", yearsUse=2016)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2015    12    30    12    0      0      0   
## 2  2015    12    31    24    0      0      0   
## 3  2016     1     1    24    0      0      0   
## 4  2016     1     2    24    0      0      0   
## 5  2016     1     3    24    0      0      0   
## 6  2016     1     4    24    0.01   0.01   0.01
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2016-01    0.55   0.55   0.55
##  2 2016-02    0      0      0   
##  3 2016-03    0      0      0   
##  4 2016-04    2.26   2.26   2.26
##  5 2016-05    0.04   0.04   0.04
##  6 2016-06    0.63   0.63   0.63
##  7 2016-07    0.06   0.06   0.06
##  8 2016-08    0.17   0.17   0.17
##  9 2016-09    0      0      0   
## 10 2016-10    0.23   0.23   0.23
## 11 2016-11    0      0      0   
## 12 2016-12    0.83   0.83   0.83
## p24Hour  p6Hour  p1Hour 
##    4.77    4.77    4.77

## 
## Mismatch precipitation amounts by day are:
## .
##   0 
## 369 
## 
## 
## Mismatch days of worse than maxDelta inches include
## [1] month     day       p24Hour   p6Hour    p1Hour    maxPrecip minPrecip
## [8] delta    
## <0 rows> (or 0-length row.names)
checkPrecipConsistency(kordPrecip, title="Chicago, IL 2016 Precipitation by Month", yearsUse=2016)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2015    12    30    12    0.09   0      0   
## 2  2015    12    31    24    0.02   0      0.02
## 3  2016     1     1    24    0.06   0.06   0.06
## 4  2016     1     2    24    0      0      0   
## 5  2016     1     3    24    0      0      0   
## 6  2016     1     4    24    0      0      0   
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2016-01    1.22   1.39   2.28
##  2 2016-02    1.18   1.27   2.17
##  3 2016-03    3.55   3.73   3.04
##  4 2016-04    2.82   2.82   2.79
##  5 2016-05    5.67   5.67   5.67
##  6 2016-06    2.59   2.59   2.59
##  7 2016-07    6.23   6.21   5.67
##  8 2016-08    4.26   4.26   4.26
##  9 2016-09    1.76   1.76   1.76
## 10 2016-10    3.77   3.77   3.77
## 11 2016-11    1.69   1.69   1.61
## 12 2016-12    1.77   1.76   1.76
## p24Hour  p6Hour  p1Hour 
##   36.51   36.92   37.37

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.02 0.03 0.04 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.14 0.15 0.16 0.19 
##  329    3    4    1    2    1    4    3    2    1    1    1    1    2    1    1 
## 0.24 0.26 0.27 0.31 0.33 0.38 0.41 0.53 0.54 0.68 
##    2    1    1    1    2    1    1    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##    month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1     12  30    0.09   0.00   0.00      0.09      0.00  0.09
## 2      1   7    0.02   0.03   0.06      0.06      0.02  0.04
## 3      1   9    0.44   0.28   0.28      0.44      0.28  0.16
## 4      1  10    0.00   0.00   0.08      0.08      0.00  0.08
## 5      1  11    0.12   0.44   0.53      0.53      0.12  0.41
## 6      1  12    0.00   0.00   0.07      0.07      0.00  0.07
## 7      1  22    0.02   0.02   0.29      0.29      0.02  0.27
## 8      1  23    0.00   0.00   0.08      0.08      0.00  0.08
## 9      1  25    0.15   0.15   0.30      0.30      0.15  0.15
## 10     1  30    0.00   0.00   0.12      0.12      0.00  0.12
## 11     2   2    0.79   0.79   0.46      0.79      0.46  0.33
## 12     2   3    0.02   0.02   0.09      0.09      0.02  0.07
## 13     2   4    0.00   0.00   0.04      0.04      0.00  0.04
## 14     2   8    0.04   0.04   0.23      0.23      0.04  0.19
## 15     2   9    0.00   0.00   0.09      0.09      0.00  0.09
## 16     2  12    0.07   0.00   0.00      0.07      0.00  0.07
## 17     2  14    0.10   0.11   0.20      0.20      0.10  0.10
## 18     2  15    0.00   0.00   0.06      0.06      0.00  0.06
## 19     2  16    0.04   0.04   0.57      0.57      0.04  0.53
## 20     2  24    0.00   0.15   0.07      0.15      0.00  0.15
## 21     2  28    0.08   0.08   0.32      0.32      0.08  0.24
## 22     3   1    0.06   0.06   0.17      0.17      0.06  0.11
## 23     3   4    0.07   0.07   0.05      0.07      0.05  0.02
## 24     3   8    0.17   0.17   0.24      0.24      0.17  0.07
## 25     3  12    0.33   0.09   0.12      0.33      0.09  0.24
## 26     3  13    0.27   0.60   0.27      0.60      0.27  0.33
## 27     3  15    0.15   0.29   0.53      0.53      0.15  0.38
## 28     3  16    0.34   0.03   0.04      0.34      0.03  0.31
## 29     3  22    0.00   0.26   0.26      0.26      0.00  0.26
## 30     3  23    0.24   0.24   0.10      0.24      0.10  0.14
## 31     3  24    0.96   0.96   0.28      0.96      0.28  0.68
## 32     4   8    0.10   0.10   0.07      0.10      0.07  0.03
## 33     7  24    0.93   0.93   0.39      0.93      0.39  0.54
## 34    11  22    0.17   0.17   0.09      0.17      0.09  0.08

While the Las Vegas, NV precipitation data are consistent on the 1-hour, 6-hour, and 24-hour measurements, there are significanr differences for Chicago, IL (particularly 1-hour outliers in Jan-Feb-Mar).

For further exploration, another cold weather city (Lincoln, NE) is assessed:

# Run for Lincoln, NE 2016 rainfall
klnkRain2016 <- runFullPrecipExtraction(klnk2016METAR, 
                                        pType="RA", 
                                        titleText="Lincoln, NE Rainfall (hours) in 2016", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2016-09-16 1011", "2016-08-29 2329"),
                                        beginExclude=c("2016-07-13 1301"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  8 variables:
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8412
## 2 TRUE          0     1   128
## 3 TRUE          1     0   122
## 4 TRUE          1     1   124
## 5 TRUE          1     2    10
## 6 TRUE          2     1    15
## 7 TRUE          2     2     2
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   15.00   33.00   65.57   78.00  522.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Run for Lincoln, NE 2016 snowfall
klnkSnow2016 <- runFullPrecipExtraction(klnk2016METAR, 
                                        pType="SN", 
                                        titleText="Lincoln, NE Snowfall (hours) in 2016", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c(),
                                        beginExclude=c("2016-01-19 1644"), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8813 obs. of  8 variables:
##  $ origMETAR : chr  "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 10 x 4
##    isPrecip nBegin  nEnd     n
##    <lgl>     <dbl> <dbl> <int>
##  1 FALSE         0     0  8727
##  2 TRUE          0     0     1
##  3 TRUE          0     1    23
##  4 TRUE          1     0    22
##  5 TRUE          1     1    23
##  6 TRUE          1     2     4
##  7 TRUE          2     1     6
##  8 TRUE          2     2     5
##  9 TRUE          2     3     1
## 10 TRUE          3     2     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   14.00   27.00   78.63   61.00  814.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Check for interval consistency in the Lincoln, NE 2016 rainfall data
# Weird issue on 2016-07-13 1354 RAB without RAE
# 2016-08-29 2254 TSRA without RAB
# 2016-09-16 0654 many mismatches due to missing RAB here
tmp <- intervalConsistency(klnkRain2016, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8499    0
##       TRUE      6  308
## 
## Mismatch at time 2016-07-13 13:54 UTC
## [1] "KLNK 131154Z 01003KT 8SM CLR 23/21 A2984 RMK AO2 SLP093 T02280211 10228 20211 53004"         
## [2] "KLNK 131254Z 20006KT 8SM CLR 23/22 A2986 RMK AO2 SLP100 T02280217"                           
## [3] "KLNK 131354Z 22017KT 10SM -RA FEW080 BKN100 24/22 A2983 RMK AO2 RAB01 SLP089 P0000 T02440217"
## [4] "KLNK 131854Z 26005KT 10SM CLR 32/16 A2988 RMK AO2 SLP106 T03170156"                          
## [5] "KLNK 131954Z 22005KT 10SM CLR 32/15 A2987 RMK AO2 SLP102 T03170150"                          
## 
## Mismatch at time 2016-08-29 22:54 UTC
## [1] "KLNK 291954Z 08003KT 10SM VCTS BKN038 BKN048 31/22 A3015 RMK AO2 LTG DSNT SW AND W SLP196 T03060217"                              
## [2] "KLNK 292054Z 35008KT 10SM TS OVC040 28/23 A3013 RMK AO2 LTG DSNT SW AND W TSB46 SLP191 T02830228 55016"                           
## [3] "KLNK 292254Z 35010KT 8SM -TSRA FEW070 BKN100 OVC120 23/20 A3016 RMK AO2 WSHFT 2138 LTG DSNT ALQDS TSE41B45 SLP205 P0021 T02330200"
## [4] "KLNK 292354Z 00000KT 10SM CLR 23/21 A3016 RMK AO2 LTG DSNT ALQDS RAE29 TSE2259 SLP205 P0001 60046 T02330206 10317 20228 51011"    
## [5] "KLNK 300054Z 34006KT 10SM BKN060 BKN070 23/20 A3017 RMK AO2 LTG DSNT E AND SE SLP206 T02280200"                                   
## 
## Mismatch at time 2016-09-16 06:54 UTC
## [1] "KLNK 160354Z 16010KT 10SM CLR 23/20 A2990 RMK AO2 SLP114 T02280200"                                            
## [2] "KLNK 160454Z 18003KT 10SM BKN041 23/21 A2993 RMK AO2 LTG DSNT W-N SLP126 T02280206"                            
## [3] "KLNK 160654Z 36015G25KT 6SM RA BR FEW070 OVC110 16/15 A3000 RMK AO2 LTG DSNT SW TSE0558 SLP149 P0014 T01610150"
## [4] "KLNK 160754Z 07005KT 8SM -RA FEW080 OVC100 16/15 A3001 RMK AO2 SLP154 P0004 T01610150"                         
## [5] "KLNK 160854Z 12006KT 10SM -RA OVC110 16/15 A2996 RMK AO2 SLP137 P0001 60019 T01610150 58024"                   
## 
## Mismatch at time 2016-09-16 07:54 UTC
## [1] "KLNK 160454Z 18003KT 10SM BKN041 23/21 A2993 RMK AO2 LTG DSNT W-N SLP126 T02280206"                            
## [2] "KLNK 160654Z 36015G25KT 6SM RA BR FEW070 OVC110 16/15 A3000 RMK AO2 LTG DSNT SW TSE0558 SLP149 P0014 T01610150"
## [3] "KLNK 160754Z 07005KT 8SM -RA FEW080 OVC100 16/15 A3001 RMK AO2 SLP154 P0004 T01610150"                         
## [4] "KLNK 160854Z 12006KT 10SM -RA OVC110 16/15 A2996 RMK AO2 SLP137 P0001 60019 T01610150 58024"                   
## [5] "KLNK 160954Z 11010KT 10SM -RA FEW065 BKN110 16/16 A2990 RMK AO2 SLP116 P0000 T01610156"                        
## 
## Mismatch at time 2016-09-16 08:54 UTC
## [1] "KLNK 160654Z 36015G25KT 6SM RA BR FEW070 OVC110 16/15 A3000 RMK AO2 LTG DSNT SW TSE0558 SLP149 P0014 T01610150"
## [2] "KLNK 160754Z 07005KT 8SM -RA FEW080 OVC100 16/15 A3001 RMK AO2 SLP154 P0004 T01610150"                         
## [3] "KLNK 160854Z 12006KT 10SM -RA OVC110 16/15 A2996 RMK AO2 SLP137 P0001 60019 T01610150 58024"                   
## [4] "KLNK 160954Z 11010KT 10SM -RA FEW065 BKN110 16/16 A2990 RMK AO2 SLP116 P0000 T01610156"                        
## [5] "KLNK 161054Z 17005KT 10SM CLR 16/15 A2998 RMK AO2 RAE11 SLP141 P0000 T01560150"                                
## 
## Mismatch at time 2016-09-16 09:54 UTC
## [1] "KLNK 160754Z 07005KT 8SM -RA FEW080 OVC100 16/15 A3001 RMK AO2 SLP154 P0004 T01610150"              
## [2] "KLNK 160854Z 12006KT 10SM -RA OVC110 16/15 A2996 RMK AO2 SLP137 P0001 60019 T01610150 58024"        
## [3] "KLNK 160954Z 11010KT 10SM -RA FEW065 BKN110 16/16 A2990 RMK AO2 SLP116 P0000 T01610156"             
## [4] "KLNK 161054Z 17005KT 10SM CLR 16/15 A2998 RMK AO2 RAE11 SLP141 P0000 T01560150"                     
## [5] "KLNK 161154Z 00000KT 10SM SCT012 16/15 A2998 RMK AO2 SLP141 60019 70068 T01560150 10167 20150 50024"
# Check for interval consistency in the Lincoln, NE 2016 rainfall data
tmp <- intervalConsistency(klnkSnow2016, pType="SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8719    0
##       TRUE      0   94
## 
## Full matches between METAR observations and intervals
# Original pass for Lincoln, NE
x5 <- findPrecipTypes(klnkRain2016)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  2128  6685 
## 
## 
## Multiple Precipitation types in the same record include
## .
##         AUTO CLR           -RA BR           BR CLR           -SN BR 
##             1148               74               56               54 
##          AUTO BR           HZ CLR         AUTO -RA      AUTO BR CLR 
##               47               40               24               24 
##            RA BR      AUTO -RA BR          AUTO FG         VCTS -RA 
##               19               10               10               10 
##       AUTO -TSRA          TSRA BR           +RA BR      AUTO HZ CLR 
##                8                6                5                5 
##         -TSRA BR         +TSRA BR         AUTO -SN      AUTO -SN BR 
##                4                4                4                4 
##          AUTO HZ          AUTO TS        AUTO VCTS      VCTS -RA BR 
##                4                4                4                4 
##       AUTO RA BR          AUTO UP            SN FG            UP BR 
##                3                3                3                3 
##      AUTO +RA BR         -FZRA BR          -RA CLR         -SN FZFG 
##                2                1                1                1 
##        -TSRA CLR           +SN FG         +SN FZFG    AUTO +TSRA BR 
##                1                1                1                1 
##    AUTO +TSRA FG          AUTO RA       AUTO SN FG     AUTO TSRA BR 
##                1                1                1                1 
## AUTO VCTS -RA BR           FU CLR          SN FZFG            TS SQ 
##                1                1                1                1 
##     VCTS -RA CLR      VCTS +RA BR       VCTS RA BR 
##                1                1                1 
## 
## Single Precipitation types in the same record include
## .
##   CLR  AUTO    BR   -RA    HZ   -SN    FG  VCTS    TS -TSRA    UP    SN +TSRA 
##  4275   409   164   117    30    20    14    14    11    10     7     4     3 
##  FZFG 
##     3
# Exclude AUTO and CLR as they are a record state and cloud type, not precipitation
x6 <- findPrecipTypes(klnkRain2016, exclTypes=c("CLR", "AUTO"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  7960   853 
## 
## 
## Multiple Precipitation types in the same record include
## .
##      -RA BR      -SN BR       RA BR    VCTS -RA      +RA BR     TSRA BR 
##          84          58          22          11           7           7 
##    +TSRA BR VCTS -RA BR    -TSRA BR       SN FG       UP BR    -FZRA BR 
##           5           5           4           4           3           1 
##    -SN FZFG      +SN FG    +SN FZFG    +TSRA FG     SN FZFG       TS SQ 
##           1           1           1           1           1           1 
## VCTS +RA BR  VCTS RA BR 
##           1           1 
## 
## Single Precipitation types in the same record include
## .
##    BR   -RA    HZ   -SN    FG -TSRA  VCTS    TS    UP    SN +TSRA  FZFG    FU 
##   291   142    79    24    24    19    18    15    10     4     3     3     1 
##    RA 
##     1
# Get the Lincoln, NE 2016 liquid precipitation
klnkPrecip <- extractLiquidPrecipAmounts(klnkRain2016)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.16 
## 8481   98   44   27   25   14   21   15    9    3    7    2    5    6    6    3 
## 0.17 0.18 0.19  0.2 0.21 0.23 0.24 0.25 0.26 0.28 0.29 0.31 0.35 0.37 0.39 0.44 
##    5    2    3    5    5    1    2    3    1    1    1    2    1    2    2    1 
## 0.47 0.48 0.53 0.56 0.64 0.65 0.73 0.77 0.85 1.67 
##    1    1    1    1    1    1    1    1    1    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.13 0.14 0.15 0.16 
## 8589   43   20   19   11    6    6   11    2    8    2    4    5    5    1    4 
## 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.26 0.27 0.29 0.31 0.32 0.33 0.35 0.36 
##    2    2    6    8    1    1    3    1    2    2    4    3    2    3    1    1 
## 0.37 0.39  0.4 0.41 0.42 0.46 0.48  0.5 0.51 0.52 0.57 0.66 0.68 0.72 0.73 0.74 
##    2    1    1    2    1    1    1    1    3    1    4    2    2    1    2    1 
## 0.77 0.78 0.91 0.98 1.11  1.2  2.3 2.34 
##    2    1    1    1    1    1    1    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.11 0.14 0.15 0.16 0.17 0.19  0.2 
## 8729   12    3    5    4    2    3    3    1    2    2    1    1    1    3    1 
## 0.21 0.24 0.27 0.28 0.29 0.32 0.33 0.34 0.35 0.36 0.44 0.46 0.49  0.5 0.51 0.53 
##    2    1    1    1    1    1    1    1    1    1    1    1    2    1    2    1 
## 0.59 0.62 0.65 0.66 0.68 0.69 0.73 0.74 0.76 0.78 0.85 0.92 0.95 0.98 1.04 1.17 
##    1    1    1    1    1    1    2    1    1    2    1    2    1    1    1    1 
## 1.21 2.34 
##    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   0.71      5.35    0.00   5.35   0.00
## 2      1   1.95      0.00    0.00   0.00   0.00
## 3      2   1.68      0.00    0.00   0.00   0.00
## 4      3   0.73      4.36    0.00   0.00   4.36
## 5      4   0.40      0.00    0.00   0.00   0.00
## 6      5   1.87      0.00    0.00   0.00   0.00
## 7      6   2.51      9.14    0.00   9.14   0.00
## 8      7   1.43      0.00    0.00   0.00   0.00
## 9      8   1.94      0.00    0.00   0.00   0.00
## 10     9   1.27      4.72    0.00   0.00   4.72
## 11    10   1.49      0.00    0.00   0.00   0.00
## 12    11   0.83      0.00    0.00   0.00   0.00
## 13    12   1.26      8.29   28.73   8.29   0.00
## 14    13   0.88      0.00    0.00   0.00   0.00
## 15    14   1.49      0.00    0.00   0.00   0.00
## 16    15   0.43      2.80    0.00   0.00   2.80
## 17    16   0.74      0.00    0.00   0.00   0.00
## 18    17   1.21      0.00    0.00   0.00   0.00
## 19    18   0.47      5.46    0.00   5.46   0.00
## 20    19   0.46      0.00    0.00   0.00   0.00
## 21    20   0.94      0.00    0.00   0.00   0.00
## 22    21   1.00      2.40    0.00   0.00   2.40
## 23    22   0.84      0.00    0.00   0.00   0.00
## 24    23   1.16      0.00    0.00   0.00   0.00
# Check for consistency in the Lincoln, NE 2016 precipitation data
checkPrecipConsistency(klnkPrecip, title="Lincoln, NE 2016 Precipitation by Month", yearsUse=2016)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2015    12    30    12       0      0      0
## 2  2015    12    31    24       0      0      0
## 3  2016     1     1    24       0      0      0
## 4  2016     1     2    24       0      0      0
## 5  2016     1     3    24       0      0      0
## 6  2016     1     4    24       0      0      0
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2016-01   0.82   0.82    0.83
##  2 2016-02   0.64   0.64    0.64
##  3 2016-03   0.96   0.96    0.96
##  4 2016-04   4.37   4.37    4.37
##  5 2016-05   5.39   5.39    5.39
##  6 2016-06   0.580  0.580   0.5 
##  7 2016-07   4.67   4.67    4.43
##  8 2016-08   3.83   3.83    3.59
##  9 2016-09   3.38   2.89    2.89
## 10 2016-10   1.86   1.86    1.86
## 11 2016-11   0.65   0.65    0.65
## 12 2016-12   1.58   1.58    1.58
## p24Hour  p6Hour  p1Hour 
##   28.73   28.24   27.69

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.08 0.24 0.49 
##  364    1    1    2    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##   month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1     6  17    0.50   0.50   0.42      0.50      0.42  0.08
## 2     7   2    0.85   0.85   0.61      0.85      0.61  0.24
## 3     8  29    0.49   0.49   0.25      0.49      0.25  0.24
## 4     9  15    0.68   0.19   0.19      0.68      0.19  0.49

There are mismatches on 4 days of 2016 in the Lincoln, NE data though these appear to be manual error rather than systematic error due to cold weather. Perhaps consulting official weather records can help determine the true liquid precipitation o the days in question.

Example #34: Checking METAR Data for Gaps and Problems

Perhaps some of the inconsistencies in METAR data are driven by missing observations or sensor anomalies. Missing observations can be detected as records expected but not detected, while sensor anomalies are flagged by a trailing ‘$’ in the METAR record.

Example code includes:

checkGapsAnomalies <- function(lst, minDay, maxDay, loc, hour24=12) {
    
    # Pull the METAR data and datetime
    metar <- lst[["testFileProc"]][["origMETAR"]]
    dtime <- lst[["testFileProc"]][["dtime"]]
    
    # Create analysis data frame
    dfUse <- tibble::tibble(metar=metar, dtime=dtime) %>%
        mutate(dtUse=dtime-lubridate::hours(hour24), 
               year=lubridate::year(dtUse), 
               month=lubridate::month(dtUse), 
               day=lubridate::day(dtUse), 
               ym=paste0(year, "-", str_pad(month, width=2, side="left", pad="0")),
               isAnomaly=grepl("\\$$", metar),
               n=1
               )
    
    cat("\nData file with new time and anomaly variable\n")
    dim(dfUse) %>% print()
    names(dfUse) %>% print()
    
    # Keep only days between minDay and maxDay inclusive
    dfUse <- dfUse %>%
        filter(lubridate::date(dtUse) >= as.Date(minDay), lubridate::date(dtUse) <= as.Date(maxDay))
    
    cat("\nData file filtered to include only desired times\n")
    dim(dfUse) %>% print()
    names(dfUse) %>% print()
    
    # Sum n and isAnomaly by day
    dfIssue <- dfUse %>%
        group_by(ym, day) %>%
        summarize(n=sum(n), anomaly=sum(isAnomaly)) %>%
        ungroup() %>%
        mutate(missObs=24-n)
    summary(dfIssue) %>% print()
    
    # Plot summary by month
    dfPlot <- dfIssue %>%
        group_by(ym) %>%
        summarize(missObsDays=sum(missObs > 0), missObs=sum(missObs), 
                  anomalyDays=sum(anomaly > 0), anomaly=sum(anomaly), 
                  nDays=n()
                  ) %>%
        ungroup()
    
    # Create plot of missing observations
    p1 <- dfPlot %>%
        ggplot(aes(x=factor(ym), y=missObs)) + 
        geom_line(aes(group=1), lwd=2, color="red") + 
        geom_text(aes(y=missObs+0.5, label=ifelse(missObs>0, missObs, "")), color="red") + 
        labs(x="", y="Missing METAR Observations in Month", 
             title=paste0(loc, " Missing METAR Observations by Month")
             ) + 
        ylim(0, NA)
    print(p1)
    
    # Create plot of missing observations
    p2 <- dfPlot %>%
        ggplot(aes(x=factor(ym), y=anomaly)) + 
        geom_line(aes(group=1), lwd=2, color="blue") + 
        labs(x="", y="Potentially Anomalous (trailing $) METAR Observations", 
             title=paste0(loc, " Potentially Anomalous METAR Observations by Month")
             ) + 
        ylim(0, NA)
    print(p2)
    
    # Return the plotting frame
    dfPlot
}

checkGapsAnomalies(klasRain2016, minDay="2016-01-01", maxDay="2016-12-31", loc="Las Vegas, NV (2016)")
## 
## Data file with new time and anomaly variable
## [1] 8818    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8770    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:366         Min.   : 1.00   Min.   :20.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.76   Mean   :23.96   Mean   : 2.653  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 1.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.03825  
##  3rd Qu.:0.00000  
##  Max.   :4.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2016-01           0       0          11      85    31
##  2 2016-02           0       0           5      40    29
##  3 2016-03           0       0          15     117    31
##  4 2016-04           0       0          12     120    30
##  5 2016-05           0       0           5      71    31
##  6 2016-06           1       2           1       4    30
##  7 2016-07           1       4           7      84    31
##  8 2016-08           3       5           5      57    31
##  9 2016-09           0       0          17     200    30
## 10 2016-10           0       0           2      18    31
## 11 2016-11           1       1           4      15    30
## 12 2016-12           1       2          15     160    31
checkGapsAnomalies(kordRain2016, minDay="2016-01-01", maxDay="2016-12-31", loc="Chicago, IL (2016)")
## 
## Data file with new time and anomaly variable
## [1] 8815    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8767    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:366         Min.   : 1.00   Min.   :20.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.76   Mean   :23.95   Mean   : 3.385  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 1.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.04645  
##  3rd Qu.:0.00000  
##  Max.   :4.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2016-01           0       0          10      96    31
##  2 2016-02           0       0           5      31    29
##  3 2016-03           0       0          13     201    31
##  4 2016-04           1       1           9      93    30
##  5 2016-05           0       0          10     182    31
##  6 2016-06           3       4          15     177    30
##  7 2016-07           2       5           7      96    31
##  8 2016-08           2       2           8      55    31
##  9 2016-09           0       0           1       7    30
## 10 2016-10           0       0           6      75    31
## 11 2016-11           4       4          11     133    30
## 12 2016-12           1       1          10      93    31
checkGapsAnomalies(klnkRain2016, minDay="2016-01-01", maxDay="2016-12-31", loc="Lincoln, NE (2016)")
## 
## Data file with new time and anomaly variable
## [1] 8813    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8765    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly       
##  Length:366         Min.   : 1.00   Min.   :20.00   Min.   : 0.0000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.0000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.0000  
##                     Mean   :15.76   Mean   :23.95   Mean   : 0.2678  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 0.0000  
##                     Max.   :31.00   Max.   :24.00   Max.   :16.0000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.05191  
##  3rd Qu.:0.00000  
##  Max.   :4.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2016-01           1       1           0       0    31
##  2 2016-02           0       0           4      21    29
##  3 2016-03           0       0           1       6    31
##  4 2016-04           0       0           1       2    30
##  5 2016-05           2       2           3      29    31
##  6 2016-06           1       3           2       6    30
##  7 2016-07           3       6           0       0    31
##  8 2016-08           3       3           3       8    31
##  9 2016-09           1       2           3       8    30
## 10 2016-10           0       0           2       6    31
## 11 2016-11           1       1           0       0    30
## 12 2016-12           1       1           4      12    31

The Chicago, IL and Las Vegas, NV sensors self-identify as anomalous roughly an order of magnitude more often than the Lincoln, NE sensor. There are no obvious spikes for Chicago, IL in Q1 2016, suggesting mismatched precipitation may be due to causes other than anomalous sensors and missing data.

Example #35: Checking Other Time Periods and Locales

The precipitation discrepancies in the Lincoln. NE data appear to be one-off while there are no material preipitation discrepancies in the Las Vegas, NV data. However, Q1 2016 is filled with precipitation discrepancies in the Chicago, IL data. Is this just a Q1 2016 problem, or something related to large midwestern cities more generally, or a specific recurring Q1 issue in Chicago?

Pulling data for Minneapolis, MN and Detroit, MI for 2016 can help address whether there is a general Q1 2016 issue in large midwestern cities. Pulling data for Chicago, IL in 2015 and 2017 can help address whether there is something specific and recurring to Chicago in Q1.

Example code includes:

# Data pulls are cached to avoid over-using the Iowa State servers

# Get data for ORD for 2015
getASOSStationTime(stationID="ORD", analysisYears=2015, ovrWrite=TRUE)
## 
## Data for station ORD from 2014-12-31 to 2016-01-02 will download to ./RInputFiles/metar_kord_2015.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=ORD&data=all&year1=2014&month1=12&day1=31&year2=2016&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kord_2015.txt
## [1] TRUE
# Get data for ORD for 2017
getASOSStationTime(stationID="ORD", analysisYears=2017, ovrWrite=TRUE)
## 
## Data for station ORD from 2016-12-31 to 2018-01-02 will download to ./RInputFiles/metar_kord_2017.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=ORD&data=all&year1=2016&month1=12&day1=31&year2=2018&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kord_2017.txt
## [1] TRUE
# Get data for MSP for 2016
getASOSStationTime(stationID="MSP", analysisYears=2016, ovrWrite=TRUE)
## 
## Data for station MSP from 2015-12-31 to 2017-01-02 will download to ./RInputFiles/metar_kmsp_2016.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=MSP&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kmsp_2016.txt
## [1] TRUE
# Get data for DTW for 2016
getASOSStationTime(stationID="DTW", analysisYears=2016, ovrWrite=TRUE)
## 
## Data for station DTW from 2015-12-31 to 2017-01-02 will download to ./RInputFiles/metar_kdtw_2016.txt 
## 
## Downloading from: https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=DTW&data=all&year1=2015&month1=12&day1=31&year2=2017&month2=1&day2=2&tz=Etc%2FUTC&format=onlycomma&latlon=no&missing=M&trace=T&direct=no&report_type=2 
## Downloading to: ./RInputFiles/metar_kdtw_2016.txt
## [1] TRUE

Create the base METAR file for Chicago, IL 2015 data:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kord_2015.txt"  # file name for raw METAR data
timeZ <- "51Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2014-12-31 00:51:00", tz="UTC")  # Expected first time read
expDays <- 367  # Expected total days read
locMET <- "Chicago, IL"  # Description of city or location
shortMET <- "KORD METAR (2015)"  # Station code and timing
longMET <- "Chicago, IL O'Hare Hourly METAR (2015)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Chicago, IL (2015)
kord2015METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10997 obs. of  29 variables:
##  $ station          : chr  "ORD" "ORD" "ORD" "ORD" ...
##  $ valid            : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ tmpf             : num  14 14 12.9 12 10.9 ...
##  $ dwpf             : num  -2.92 -2.92 -2.02 -2.92 -5.08 -5.08 -4 -5.98 -5.98 -7.06 ...
##  $ relh             : num  46.2 46.2 50.5 50.4 47.7 ...
##  $ drct             : num  300 300 300 280 290 290 290 300 280 270 ...
##  $ sknt             : num  7 10 7 10 10 8 8 9 10 9 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.6 30.6 30.6 30.6 30.6 ...
##  $ mslp             : num  1038 1038 1038 1038 1038 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "SCT" "FEW" "SCT" "SCT" ...
##  $ skyc2            : chr  NA NA NA "BKN" ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  25000 25000 25000 19000 19000 NA NA NA NA NA ...
##  $ skyl2            : num  NA NA NA 25000 25000 NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  2.84 0.4 1.52 -2.09 -3.44 -2.94 -4.27 -6.25 -8.41 -8.72 ...
##  $ metar            : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2015-01-21 08:51:00 UTC" "2015-02-25 21:51:00 UTC"
##  [3] "2015-03-08 23:51:00 UTC" "2015-03-15 16:51:00 UTC"
##  [5] "2015-03-23 16:51:00 UTC" "2015-04-16 17:51:00 UTC"
##  [7] "2015-05-17 12:51:00 UTC" "2015-06-17 04:51:00 UTC"
##  [9] "2015-06-17 05:51:00 UTC" "2015-06-17 06:51:00 UTC"
## [11] "2015-06-17 07:51:00 UTC" "2015-06-17 08:51:00 UTC"
## [13] "2015-06-17 09:51:00 UTC" "2015-06-21 03:51:00 UTC"
## [15] "2015-06-21 04:51:00 UTC" "2015-06-29 00:51:00 UTC"
## [17] "2015-07-01 04:51:00 UTC" "2015-08-24 00:51:00 UTC"
## [19] "2015-08-24 01:51:00 UTC" "2015-09-23 18:51:00 UTC"
## [21] "2015-10-03 02:51:00 UTC" "2015-10-03 03:51:00 UTC"
## [23] "2015-10-03 04:51:00 UTC" "2015-10-03 05:51:00 UTC"
## [25] "2015-10-03 06:51:00 UTC" "2015-10-03 08:51:00 UTC"
## [27] "2015-11-01 00:51:00 UTC" "2015-11-01 01:51:00 UTC"
## [29] "2015-11-01 02:51:00 UTC" "2015-11-01 03:51:00 UTC"
## [31] "2015-11-01 04:51:00 UTC" "2015-12-14 14:51:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "30007KT" "300" "07" NA  
## [2,] "30010KT" "300" "10" NA  
## [3,] "30007KT" "300" "07" NA  
## [4,] "28010KT" "280" "10" NA  
## [5,] "29010KT" "290" "10" NA  
## [6,] "29008KT" "290" "08" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  518  160  256  275  271  268  241  196  135  152  105   56   49   82  159  151 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  154  215  325  358  387  356  353  275  346  413  372  282  311  234  207  197 
##  320  330  340  350  360  VRB <NA> 
##  181  169  150  150  162  102    3 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  518  463  626  751  822  843  774  725  599  517  448  383  297  239  216  159 
##   18   19   20   21   22   23   24   25   26   27   28   30 <NA> 
##  123   88   62   43   26   12   13   12    9    3    1    1    3 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    8   18   32   51   74  111  135  123  142  122  109   95   71   76   54   51 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G41  G43  G45 <NA> 
##   32   19   17   17    8    3   10    6    5    1    1    1    1 7383 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 3 x 1
##   metar                                                                         
##   <chr>                                                                         
## 1 KORD 120851Z 4SM -RA BR SCT005 SCT032 BKN090 OVC150 16/15 A2971 RMK AO2 TSE43~
## 2 KORD 271451Z 10SM SCT031 18/11 A3020 RMK AO2 SLP226 T01830111 58005           
## 3 KORD 201551Z 10SM SCT036 SCT250 BKN350 19/11 A3021 RMK AO2 SLP230 T01940106

## Warning: Removed 3 rows containing non-finite values (stat_count).

## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##     8  8768 
## 
## *** Data Not Matched *** 
## [1] "KORD 111851Z 30014KT 10SM OVC028 02/-04 A2999 RMK SLP162 T00171044"                                                                              
## [2] "KORD 111951Z 31015KT 10SM OVC023 01/M04 RMK AO2 SLPNO T00061039 $"                                                                               
## [3] "KORD 120851Z 4SM -RA BR SCT005 SCT032 BKN090 OVC150 16/15 A2971 RMK AO2 TSE43 SLP059 OCNL LTGICCC DSNT NE CB DSNT NE P0004 60004 T01560150 58008"
## [4] "KORD 051551Z 21008KT 10SM SCT040 26/15 A3008"                                                                                                    
## [5] "KORD 130251Z 14008KT 10SM SCT033 23/19 A2992"                                                                                                    
## [6] "KORD 271451Z 10SM SCT031 18/11 A3020 RMK AO2 SLP226 T01830111 58005"                                                                             
## [7] "KORD 201551Z 10SM SCT036 SCT250 BKN350 19/11 A3021 RMK AO2 SLP230 T01940106"                                                                     
## [8] "KORD 160151Z 32005KT 10SM CLR 10/M01 A3010 RMK AO2 SLPNO T01001011"                                                                              
## 
## *** Parsing matrix summary *** 
## [1] 8776   11
##      [,1]                                                                   
## [1,] "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194"       
## [2,] "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194"       
## [3,] "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189"       
## [4,] "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194"
## [5,] "51Z 29010KT 10SM FEW190 SCT250 M12/M21 A3060 RMK AO2 SLP378 T11171206"
## [6,] "51Z 29008KT 10SM CLR M12/M21 A3059 RMK AO2 SLP374 T11221206"          
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "300" "07" NA   " "  "10SM" "M10" "M19" "A3061" "SLP380" "T11001194"
## [2,] "300" "10" NA   " "  "10SM" "M10" "M19" "A3062" "SLP383" "T11001194"
## [3,] "300" "07" NA   " "  "10SM" "M11" "M19" "A3060" "SLP378" "T11061189"
## [4,] "280" "10" NA   " "  "10SM" "M11" "M19" "A3061" "SLP378" "T11111194"
## [5,] "290" "10" NA   " "  "10SM" "M12" "M21" "A3060" "SLP378" "T11171206"
## [6,] "290" "08" NA   " "  "10SM" "M12" "M21" "A3059" "SLP374" "T11221206"
## 
## *** Summary of the parsed data *** 
## Observations: 8,776
## Variables: 13
## $ METAR      <chr> "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T1...
## $ WindDir    <chr> "300", "300", "300", "280", "290", "290", "290", "300", ...
## $ WindSpeed  <chr> "07", "10", "07", "10", "10", "08", "08", "09", "10", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M10", "M10", "M11", "M11", "M12", "M12", "M13", "M13", ...
## $ DewC       <chr> "M19", "M19", "M19", "M19", "M21", "M21", "M20", "M21", ...
## $ Altimeter  <chr> "A3061", "A3062", "A3060", "A3061", "A3060", "A3059", "A...
## $ SLP        <chr> "SLP380", "SLP383", "SLP378", "SLP378", "SLP378", "SLP37...
## $ FahrC      <chr> "T11001194", "T11001194", "T11061189", "T11111194", "T11...
## $ dtime      <dttm> 2014-12-31 00:51:00, 2014-12-31 01:51:00, 2014-12-31 02...
## $ origMETAR  <chr> "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 ...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  15 variables:
##  $ METAR     : chr  "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ WindDir   : chr  "300" "300" "300" "280" ...
##  $ WindSpeed : int  7 10 7 10 10 8 8 9 10 9 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -10 -10 -11 -11 -12 -12 -13 -13 -14 -14 ...
##  $ DewC      : int  -19 -19 -19 -19 -21 -21 -20 -21 -21 -22 ...
##  $ Altimeter : int  3061 3062 3060 3061 3060 3059 3058 3058 3056 3054 ...
##  $ SLP       : int  380 383 378 378 378 374 371 368 363 357 ...
##  $ FahrC     : chr  "T11001194" "T11001194" "T11061189" "T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ TempF     : num  14 14 12.9 12 10.9 ...
##  $ DewF      : num  -2.92 -2.92 -2.02 -2.92 -5.08 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 300             7       NA " "           10   -10   -19      3061   380
## 2 51Z ~ 300            10       NA " "           10   -10   -19      3062   383
## 3 51Z ~ 300             7       NA " "           10   -11   -19      3060   378
## 4 51Z ~ 280            10       NA " "           10   -11   -19      3061   378
## 5 51Z ~ 290            10       NA " "           10   -12   -21      3060   378
## 6 51Z ~ 290             8       NA " "           10   -12   -21      3059   374
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 250            18       NA " "           10    -3    -7      3014   215
## 2 51Z ~ 260            14       NA " "           10    -3    -7      3013   212
## 3 51Z ~ 220            13       NA " "           10    -3    -7      3011   207
## 4 51Z ~ 240            12       NA " "           10    -2    -6      3011   205
## 5 51Z ~ 230            13       NA " "           10    -2    -6      3011   206
## 6 51Z ~ 230            12       NA " "           10    -1    -6      3010   202
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 51Z ~ 240             3       NA " "           10    22    16      3014   203
##  2 51Z ~ 250            12       NA " "           10    22    14      2993   132
##  3 51Z ~ 160             9       NA " "            8     7     4      2981    97
##  4 51Z ~ 040            10       NA " "           10     6    -1      2999   158
##  5 51Z ~ 130            16       NA " "            8     6     4      2970    62
##  6 51Z ~ 340             5       NA " "           10    22    16      3004   167
##  7 51Z ~ 320            14       NA " "           10    18     8      2996   145
##  8 51Z ~ 280            10       NA " "            3     1    -1      2984   112
##  9 51Z ~ 030            15       NA " "           10    18    14      3000   156
## 10 51Z ~ 180             5       NA " "           10    17    14      3006   175
## 11 51Z ~ 180             5       NA " "           10    14    11      3022   229
## 12 51Z ~ 050             7       NA " "           10     5    -1      3023   240
## 13 51Z ~ 260            23       NA " "           10     5     1      2951   993
## 14 51Z ~ 050            20       NA " "           10    12     4      3019   227
## 15 51Z ~ 250            13       NA " "           10     3    -1      2967    52
## 16 51Z ~ 090             7       NA " 05~         10    17    11      3017   217
## 17 51Z ~ 210            10       NA " "           10    21     8      2997   145
## 18 51Z ~ 210            11       NA " "           10   -21   -24      3036   302
## 19 51Z ~ 240            13       NA " "           10     3    -1      2995   147
## 20 51Z ~ 040            14       NA " "           10    17    12      3014   203
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##          8          8          8       8776          8          8          8 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##          8          8          8          8          0          0          8 
##       DewF 
##          8

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 19 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0.125     9
##  2      0.25     40
##  3      0.5      59
##  4      0.75     42
##  5      1        32
##  6      1.25     24
##  7      1.5      57
##  8      1.75     22
##  9      2        88
## 10      2.5      70
## 11      3       123
## 12      4       136
## 13      5       161
## 14      6       171
## 15      7       192
## 16      8       240
## 17      9       312
## 18     10      6990
## 19     NA         8
##    WindGust    n
## 1        14    6
## 2        15   16
## 3        16   32
## 4        17   51
## 5        18   74
## 6        19  111
## 7        20  135
## 8        21  123
## 9        22  142
## 10       23  122
## 11       24  109
## 12       25   95
## 13       26   71
## 14       27   76
## 15       28   54
## 16       29   51
## 17       30   32
## 18       31   19
## 19       32   17
## 20       33   17
## 21       34    8
## 22       35    3
## 23       36   10
## 24       37    6
## 25       38    5
## 26       39    1
## 27       41    1
## 28       43    1
## 29       45    1
## 30       NA 7387
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8768 complete cases (99.9% of 8776 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.93  0.93     -0.41  -0.47     -0.09       0.13
## TempF       1.00  1.00  0.93  0.93     -0.41  -0.47     -0.09       0.13
## DewC        0.93  0.93  1.00  1.00     -0.47  -0.52     -0.17      -0.03
## DewF        0.93  0.93  1.00  1.00     -0.47  -0.52     -0.17      -0.03
## Altimeter  -0.41 -0.41 -0.47 -0.47      1.00   1.00     -0.21       0.11
## modSLP     -0.47 -0.47 -0.52 -0.52      1.00   1.00     -0.19       0.10
## WindSpeed  -0.09 -0.09 -0.17 -0.17     -0.21  -0.19      1.00       0.04
## Visibility  0.13  0.13 -0.03 -0.03      0.11   0.10      0.04       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0624 -0.4345 -0.1136  0.4120  1.8678 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.744e+01  7.783e-01  -48.11   <2e-16 ***
## Altimeter    3.512e-01  2.591e-04 1355.49   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5353 on 8766 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9953, Adjusted R-squared:  0.9953 
## F-statistic: 1.837e+06 on 1 and 8766 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74962 -0.13024  0.00199  0.12630  0.64341 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.6218769  0.2950506  -19.05   <2e-16 ***
## Altimeter    0.3410119  0.0000975 3497.55   <2e-16 ***
## TempF       -0.0264150  0.0001033 -255.76   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.184 on 8765 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9994 
## F-statistic: 7.807e+06 on 2 and 8765 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1 1616
## 2      0    0      0      0      1      0  602
## 3      0    0      0      0      1      1  537
## 4      0    0      0      0      2      0  217
## 5      0    0      0      0      2      1  136
## 6      0    0      0      0      3      0   45
## 7      0    0      0      0      3      1    3
## 8      0    0      0      1      0      0  404
## 9      0    0      0      1      0      1  142
## 10     0    0      0      1      1      0  236
## 11     0    0      0      1      1      1  204
## 12     0    0      0      1      2      0  109
## 13     0    0      0      1      2      1   24
## 14     0    0      0      1      3      0   12
## 15     0    0      0      2      0      0  115
## 16     0    0      0      2      0      1   21
## 17     0    0      0      2      1      0   53
## 18     0    0      0      2      1      1   11
## 19     0    0      0      2      2      0   15
## 20     0    0      0      2      2      1    1
## 21     0    0      0      3      0      0    9
## 22     0    0      0      3      0      1    1
## 23     0    0      0      3      1      0    3
## 24     0    0      1      0      0      0 1056
## 25     0    0      1      0      0      1  182
## 26     0    0      1      0      1      0  314
## 27     0    0      1      0      1      1  216
## 28     0    0      1      0      2      0   88
## 29     0    0      1      0      2      1   27
## 30     0    0      1      0      3      0   11
## 31     0    0      1      1      0      0  301
## 32     0    0      1      1      0      1  107
## 33     0    0      1      1      1      0  182
## 34     0    0      1      1      1      1   54
## 35     0    0      1      1      2      0   37
## 36     0    0      1      1      2      1    3
## 37     0    0      1      2      0      0   40
## 38     0    0      1      2      0      1    9
## 39     0    0      1      2      1      0   24
## 40     0    0      1      2      1      1    2
## 41     0    0      1      3      0      0    2
## 42     0    0      2      0      0      0  343
## 43     0    0      2      0      0      1   27
## 44     0    0      2      0      1      0   62
## 45     0    0      2      0      1      1   19
## 46     0    0      2      0      2      0    7
## 47     0    0      2      0      2      1    1
## 48     0    0      2      1      0      0   87
## 49     0    0      2      1      0      1    3
## 50     0    0      2      1      1      0   23
## 51     0    0      2      1      2      0    1
## 52     0    0      2      2      0      0    6
## 53     0    0      3      0      0      0    4
## 54     0    1      0      0      0      0   71
## 55     1    0      0      0      0      0  951
## 
## *** METAR records where no clouds were extracted ***
## character(0)

## 
## *** Dimensions for the cloud matrix ***
## [1] 8776   12
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8776 obs. of  7 variables:
##   ..$ isCLR : num [1:8776] 0 0 0 0 0 1 1 1 1 1 ...
##   ..$ isVV  : num [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8776] 0 1 0 0 1 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8776] 1 0 1 1 1 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8776] 0 0 0 1 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##  $ mtxCloud : chr [1:8776, 1:12] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,776 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA     NA     NA  25000     NA
##  2    NA     NA     NA     NA  25000
##  3    NA     NA     NA  25000     NA
##  4    NA     NA  25000  19000     NA
##  5    NA     NA     NA  25000  19000
##  6    NA     NA     NA     NA     NA
##  7    NA     NA     NA     NA     NA
##  8    NA     NA     NA     NA     NA
##  9    NA     NA     NA     NA     NA
## 10    NA     NA     NA     NA     NA
## # ... with 8,766 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  5 variables:
##   ..$ lowVV : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN: num [1:8776] NA NA NA 25000 NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8776] 25000 NA 25000 19000 25000 NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8776] NA 25000 NA NA 19000 NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8776] 999999 999999 999999 25000 999999 ...
##  $ minCloudLevel  : num [1:8776] 25000 25000 25000 19000 19000 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  30 variables:
##  $ METAR     : chr  "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ WindDir   : chr  "300" "300" "300" "280" ...
##  $ WindSpeed : int  7 10 7 10 10 8 8 9 10 9 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -10 -10 -11 -11 -12 -12 -13 -13 -14 -14 ...
##  $ DewC      : int  -19 -19 -19 -19 -21 -21 -20 -21 -21 -22 ...
##  $ Altimeter : int  3061 3062 3060 3061 3060 3059 3058 3058 3056 3054 ...
##  $ SLP       : int  380 383 378 378 378 374 371 368 363 357 ...
##  $ FahrC     : chr  "T11001194" "T11001194" "T11061189" "T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ TempF     : num  14 14 12.9 12 10.9 ...
##  $ DewF      : num  -2.92 -2.92 -2.02 -2.92 -5.08 ...
##  $ modSLP    : num  1038 1038 1038 1038 1038 ...
##  $ isCLR     : num  0 0 0 0 0 1 1 1 1 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 1 0 0 1 0 0 0 0 0 ...
##  $ numSCT    : int  1 0 1 1 1 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ numOVC    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowBKN    : num  NA NA NA 25000 NA NA NA NA NA NA ...
##  $ lowSCT    : num  25000 NA 25000 19000 25000 NA NA NA NA NA ...
##  $ lowFEW    : num  NA 25000 NA NA 19000 NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 4 5 4 3 4 6 6 6 6 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kord2015METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  30 variables:
##   ..$ METAR     : chr [1:8776] "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ WindDir   : chr [1:8776] "300" "300" "300" "280" ...
##   ..$ WindSpeed : int [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ WindGust  : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8776] " " " " " " " " ...
##   ..$ Visibility: num [1:8776] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8776] -10 -10 -11 -11 -12 -12 -13 -13 -14 -14 ...
##   ..$ DewC      : int [1:8776] -19 -19 -19 -19 -21 -21 -20 -21 -21 -22 ...
##   ..$ Altimeter : int [1:8776] 3061 3062 3060 3061 3060 3059 3058 3058 3056 3054 ...
##   ..$ SLP       : int [1:8776] 380 383 378 378 378 374 371 368 363 357 ...
##   ..$ FahrC     : chr [1:8776] "T11001194" "T11001194" "T11061189" "T11111194" ...
##   ..$ dtime     : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ TempF     : num [1:8776] 14 14 12.9 12 10.9 ...
##   ..$ DewF      : num [1:8776] -2.92 -2.92 -2.02 -2.92 -5.08 ...
##   ..$ modSLP    : num [1:8776] 1038 1038 1038 1038 1038 ...
##   ..$ isCLR     : num [1:8776] 0 0 0 0 0 1 1 1 1 1 ...
##   ..$ isVV      : num [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8776] 0 1 0 0 1 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8776] 1 0 1 1 1 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8776] 0 0 0 1 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ lowVV     : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowBKN    : num [1:8776] NA NA NA 25000 NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8776] 25000 NA 25000 19000 25000 NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8776] NA 25000 NA NA 19000 NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 4 5 4 3 4 6 6 6 6 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  29 variables:
##   ..$ station          : chr [1:8776] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8776] 14 14 12.9 12 10.9 ...
##   ..$ dwpf             : num [1:8776] -2.92 -2.92 -2.02 -2.92 -5.08 -5.08 -4 -5.98 -5.98 -7.06 ...
##   ..$ relh             : num [1:8776] 46.2 46.2 50.5 50.4 47.7 ...
##   ..$ drct             : num [1:8776] 300 300 300 280 290 290 290 300 280 270 ...
##   ..$ sknt             : num [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ p01i             : chr [1:8776] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8776] 30.6 30.6 30.6 30.6 30.6 ...
##   ..$ mslp             : num [1:8776] 1038 1038 1038 1038 1038 ...
##   ..$ vsby             : num [1:8776] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8776] "SCT" "FEW" "SCT" "SCT" ...
##   ..$ skyc2            : chr [1:8776] NA NA NA "BKN" ...
##   ..$ skyc3            : chr [1:8776] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8776] NA NA NA NA ...
##   ..$ skyl1            : num [1:8776] 25000 25000 25000 19000 19000 NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8776] NA NA NA 25000 25000 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8776] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8776], format: NA NA ...
##   ..$ feel             : num [1:8776] 2.84 0.4 1.52 -2.09 -3.44 -2.94 -4.27 -6.25 -8.41 -8.72 ...
##   ..$ metar            : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  32 variables:
##   ..$ station          : chr [1:8776] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8776] 14 14 12.9 12 10.9 ...
##   ..$ dwpf             : num [1:8776] -2.92 -2.92 -2.02 -2.92 -5.08 -5.08 -4 -5.98 -5.98 -7.06 ...
##   ..$ relh             : num [1:8776] 46.2 46.2 50.5 50.4 47.7 ...
##   ..$ drct             : num [1:8776] 300 300 300 280 290 290 290 300 280 270 ...
##   ..$ sknt             : num [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ p01i             : chr [1:8776] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8776] 30.6 30.6 30.6 30.6 30.6 ...
##   ..$ mslp             : num [1:8776] 1038 1038 1038 1038 1038 ...
##   ..$ vsby             : num [1:8776] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8776] "SCT" "FEW" "SCT" "SCT" ...
##   ..$ skyc2            : chr [1:8776] NA NA NA "BKN" ...
##   ..$ skyc3            : chr [1:8776] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8776] NA NA NA NA ...
##   ..$ skyl1            : num [1:8776] 25000 25000 25000 19000 19000 NA NA NA NA NA ...
##   ..$ skyl2            : num [1:8776] NA NA NA 25000 25000 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8776] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8776] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8776], format: NA NA ...
##   ..$ feel             : num [1:8776] 2.84 0.4 1.52 -2.09 -3.44 -2.94 -4.27 -6.25 -8.41 -8.72 ...
##   ..$ metar            : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ dirW             : chr [1:8776] "300" "300" "300" "280" ...
##   ..$ spdW             : num [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ gustW            : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  13 variables:
##   ..$ METAR     : chr [1:8776] "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ WindDir   : chr [1:8776] "300" "300" "300" "280" ...
##   ..$ WindSpeed : chr [1:8776] "07" "10" "07" "10" ...
##   ..$ WindGust  : chr [1:8776] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8776] " " " " " " " " ...
##   ..$ Visibility: chr [1:8776] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8776] "M10" "M10" "M11" "M11" ...
##   ..$ DewC      : chr [1:8776] "M19" "M19" "M19" "M19" ...
##   ..$ Altimeter : chr [1:8776] "A3061" "A3062" "A3060" "A3061" ...
##   ..$ SLP       : chr [1:8776] "SLP380" "SLP383" "SLP378" "SLP378" ...
##   ..$ FahrC     : chr [1:8776] "T11001194" "T11001194" "T11061189" "T11111194" ...
##   ..$ dtime     : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  15 variables:
##   ..$ METAR     : chr [1:8776] "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ WindDir   : chr [1:8776] "300" "300" "300" "280" ...
##   ..$ WindSpeed : int [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ WindGust  : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8776] " " " " " " " " ...
##   ..$ Visibility: num [1:8776] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8776] -10 -10 -11 -11 -12 -12 -13 -13 -14 -14 ...
##   ..$ DewC      : int [1:8776] -19 -19 -19 -19 -21 -21 -20 -21 -21 -22 ...
##   ..$ Altimeter : int [1:8776] 3061 3062 3060 3061 3060 3059 3058 3058 3056 3054 ...
##   ..$ SLP       : int [1:8776] 380 383 378 378 378 374 371 368 363 357 ...
##   ..$ FahrC     : chr [1:8776] "T11001194" "T11001194" "T11061189" "T11111194" ...
##   ..$ dtime     : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ TempF     : num [1:8776] 14 14 12.9 12 10.9 ...
##   ..$ DewF      : num [1:8776] -2.92 -2.92 -2.02 -2.92 -5.08 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8776 obs. of  16 variables:
##   ..$ METAR     : chr [1:8776] "51Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "51Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "51Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189" "51Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ WindDir   : chr [1:8776] "300" "300" "300" "280" ...
##   ..$ WindSpeed : int [1:8776] 7 10 7 10 10 8 8 9 10 9 ...
##   ..$ WindGust  : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8776] " " " " " " " " ...
##   ..$ Visibility: num [1:8776] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8776] -10 -10 -11 -11 -12 -12 -13 -13 -14 -14 ...
##   ..$ DewC      : int [1:8776] -19 -19 -19 -19 -21 -21 -20 -21 -21 -22 ...
##   ..$ Altimeter : int [1:8776] 3061 3062 3060 3061 3060 3059 3058 3058 3056 3054 ...
##   ..$ SLP       : int [1:8776] 380 383 378 378 378 374 371 368 363 357 ...
##   ..$ FahrC     : chr [1:8776] "T11001194" "T11001194" "T11061189" "T11111194" ...
##   ..$ dtime     : POSIXct[1:8776], format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8776] "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##   ..$ TempF     : num [1:8776] 14 14 12.9 12 10.9 ...
##   ..$ DewF      : num [1:8776] -2.92 -2.92 -2.02 -2.92 -5.08 ...
##   ..$ modSLP    : num [1:8776] 1038 1038 1038 1038 1038 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8776] 0 0 0 0 0 1 1 1 1 1 ...
##   .. ..$ isVV  : num [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8776] 0 1 0 0 1 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8776] 1 0 1 1 1 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8776] 0 0 0 1 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8776] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ mtxCloud : chr [1:8776, 1:12] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8776 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8776] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowBKN: num [1:8776] NA NA NA 25000 NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8776] 25000 NA 25000 19000 25000 NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8776] NA 25000 NA NA 19000 NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8776] 999999 999999 999999 25000 999999 ...
##   ..$ minCloudLevel  : num [1:8776] 25000 25000 25000 19000 19000 ...

Create the base METAR file for Chicago, IL 2017 data:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kord_2017.txt"  # file name for raw METAR data
timeZ <- "51Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2016-12-31 00:51:00", tz="UTC")  # Expected first time read
expDays <- 367  # Expected total days read
locMET <- "Chicago, IL"  # Description of city or location
shortMET <- "KORD METAR (2017)"  # Station code and timing
longMET <- "Chicago, IL O'Hare Hourly METAR (2017)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Chicago, IL (2017)
kord2017METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12565 obs. of  29 variables:
##  $ station          : chr  "ORD" "ORD" "ORD" "ORD" ...
##  $ valid            : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ tmpf             : num  30 30.9 32 35.1 36 37.9 37 37.9 39 37.9 ...
##  $ dwpf             : num  19.9 21.9 21.9 21 21 21 21.9 21.9 21.9 21.9 ...
##  $ relh             : num  65.7 68.9 65.9 56 54.1 ...
##  $ drct             : num  180 180 180 190 190 210 200 210 220 220 ...
##  $ sknt             : num  10 15 13 17 15 20 21 12 15 12 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  29.8 29.8 29.8 29.7 29.7 ...
##  $ mslp             : num  1011 1010 1008 1007 1005 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA 22 25 33 30 23 26 22 ...
##  $ skyc1            : chr  "SCT" "FEW" "SCT" "BKN" ...
##  $ skyc2            : chr  "SCT" "SCT" "SCT" "BKN" ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  13000 13000 13000 14000 16000 22000 20000 18000 18000 19000 ...
##  $ skyl2            : num  22000 18000 18000 18000 25000 25000 NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA 27 33 32 NA 28 NA ...
##  $ peak_wind_drct   : num  NA NA NA NA 190 200 210 NA 210 NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  20.5 19.4 21.6 24.1 26 ...
##  $ metar            : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2017-01-13 01:51:00 UTC" "2017-02-13 18:51:00 UTC"
##  [3] "2017-02-13 19:51:00 UTC" "2017-03-01 01:51:00 UTC"
##  [5] "2017-06-19 03:51:00 UTC" "2017-06-19 04:51:00 UTC"
##  [7] "2017-07-10 20:51:00 UTC" "2017-07-12 14:51:00 UTC"
##  [9] "2017-07-18 09:51:00 UTC" "2017-07-20 02:51:00 UTC"
## [11] "2017-08-17 06:51:00 UTC" "2017-08-17 07:51:00 UTC"
## [13] "2017-08-17 08:51:00 UTC" "2017-08-17 09:51:00 UTC"
## [15] "2017-08-31 23:51:00 UTC" "2017-09-30 23:51:00 UTC"
## [17] "2017-10-14 07:51:00 UTC" "2017-10-27 00:51:00 UTC"
## [19] "2017-10-30 09:51:00 UTC" "2017-10-31 23:51:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]         [,2]  [,3] [,4] 
## [1,] "18010KT"    "180" "10" NA   
## [2,] "18015KT"    "180" "15" NA   
## [3,] "18013KT"    "180" "13" NA   
## [4,] "19017G22KT" "190" "17" "G22"
## [5,] "19015G25KT" "190" "15" "G25"
## [6,] "21020G33KT" "210" "20" "G33"
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  464  184  217  275  287  247  226  183  150  161  114   47   56  103  148  140 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  178  238  326  328  307  293  220  234  269  308  292  343  408  328  315  272 
##  320  330  340  350  360  VRB <NA> 
##  240  226  194  192  169  100    6 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  464  460  640  695  752  777  762  703  667  637  483  377  354  269  219  158 
##   18   19   20   21   22   23   24   25   26   27   28   29   31 <NA> 
##  102   79   57   47   23   19   12    5    6    7    3    3    2    6 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##   13   18   35   62   91  121  139  162  155  139   96   74   52   57   49   39 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G42  G43  G45  G46  G48 
##   34   26   11   20   11    9    3    9    1    7    5    2    3    1    1    1 
## <NA> 
## 7342 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 6 x 1
##   metar                                                                         
##   <chr>                                                                         
## 1 KORD 111851Z 10SM FEW008 FEW040 SCT140 BKN250 M02/M21 A3047 RMK AO2 SLP329 FU~
## 2 KORD 092151Z 10SM BKN030 OVC250 16/05 A2991 RMK AO2 SLP128 T01560050          
## 3 KORD 221851Z 10SM FEW030 SCT047 SCT065 BKN150 BKN250 29/20 A2982 RMK AO2 SLP0~
## 4 KORD 251951Z 10SM FEW055 BKN250 27/12 A3018 RMK AO2 SLP214 T02720122          
## 5 KORD 142051Z 10SM FEW034 27/16 A2987 RMK AO2 SLP110 T02670161 58008           
## 6 KORD 161851Z 10SM FEW250 07/M04 A2988 RMK AO2 SLP123 T00671039

## Warning: Removed 6 rows containing non-finite values (stat_count).

## Warning: Removed 2 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    11  8777 
## 
## *** Data Not Matched *** 
##  [1] "KORD 130451Z 30010KT 10SM FEW200 OVC250 M07/M14 A3049"                                         
##  [2] "KORD 111851Z 10SM FEW008 FEW040 SCT140 BKN250 M02/M21 A3047 RMK AO2 SLP329 FU FEW008 T10221211"
##  [3] "KORD 092151Z 10SM BKN030 OVC250 16/05 A2991 RMK AO2 SLP128 T01560050"                          
##  [4] "KORD 221851Z 10SM FEW030 SCT047 SCT065 BKN150 BKN250 29/20 A2982 RMK AO2 SLP091 T02890200"     
##  [5] "KORD 251951Z 10SM FEW055 BKN250 27/12 A3018 RMK AO2 SLP214 T02720122"                          
##  [6] "KORD 142051Z 10SM FEW034 27/16 A2987 RMK AO2 SLP110 T02670161 58008"                           
##  [7] "KORD 281751Z 28005KT 10SM FEW050 21/06 A3019"                                                  
##  [8] "KORD 281851Z VRB05KT 10SM FEW060 21/06 RMK AO2 SLPNO T02060056 $"                              
##  [9] "KORD 281951Z 26010KT 10SM CLR 22/06 RMK AO2 SLPNO ALT 3015 T02170061 $"                        
## [10] "KORD 290451Z 25005KT 10SM FEW090 SCT100 17/08 A3010 RMK AO2 SLPNO T01670078 $"                 
## [11] "KORD 161851Z 10SM FEW250 07/M04 A2988 RMK AO2 SLP123 T00671039"                                
## 
## *** Parsing matrix summary *** 
## [1] 8788   11
##      [,1]                                                                                       
## [1,] "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067"                    
## [2,] "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056"                    
## [3,] "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056"                     
## [4,] "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061"                  
## [5,] "51Z 19015G25KT 10SM FEW160 OVC250 02/M06 A2966 RMK AO2 PK WND 19027/0420 SLP050 T00221061"
## [6,] "51Z 21020G33KT 10SM BKN220 OVC250 03/M06 A2961 RMK AO2 PK WND 20033/0546 SLP032 T00331061"
##      [,2]  [,3] [,4]  [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "180" "10" NA    " "  "10SM" "M01" "M07" "A2984" "SLP112" "T10111067"
## [2,] "180" "15" NA    " "  "10SM" "M01" "M06" "A2979" "SLP096" "T10061056"
## [3,] "180" "13" NA    " "  "10SM" "00"  "M06" "A2975" "SLP081" "T00001056"
## [4,] "190" "17" "G22" " "  "10SM" "02"  "M06" "A2971" "SLP067" "T00171061"
## [5,] "190" "15" "G25" " "  "10SM" "02"  "M06" "A2966" "SLP050" "T00221061"
## [6,] "210" "20" "G33" " "  "10SM" "03"  "M06" "A2961" "SLP032" "T00331061"
## 
## *** Summary of the parsed data *** 
## Observations: 8,788
## Variables: 13
## $ METAR      <chr> "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SL...
## $ WindDir    <chr> "180", "180", "180", "190", "190", "210", "200", "210", ...
## $ WindSpeed  <chr> "10", "15", "13", "17", "15", "20", "21", "12", "15", "1...
## $ WindGust   <chr> NA, NA, NA, "G22", "G25", "G33", "G30", "G23", "G26", "G...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "M01", "M01", "00", "02", "02", "03", "03", "03", "04", ...
## $ DewC       <chr> "M07", "M06", "M06", "M06", "M06", "M06", "M06", "M06", ...
## $ Altimeter  <chr> "A2984", "A2979", "A2975", "A2971", "A2966", "A2961", "A...
## $ SLP        <chr> "SLP112", "SLP096", "SLP081", "SLP067", "SLP050", "SLP03...
## $ FahrC      <chr> "T10111067", "T10061056", "T00001056", "T00171061", "T00...
## $ dtime      <dttm> 2016-12-31 00:51:00, 2016-12-31 01:51:00, 2016-12-31 02...
## $ origMETAR  <chr> "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 R...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  15 variables:
##  $ METAR     : chr  "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ WindDir   : chr  "180" "180" "180" "190" ...
##  $ WindSpeed : int  10 15 13 17 15 20 21 12 15 12 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -1 -1 0 2 2 3 3 3 4 3 ...
##  $ DewC      : int  -7 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  2984 2979 2975 2971 2966 2961 2957 2957 2956 2954 ...
##  $ SLP       : int  112 96 81 67 50 32 19 17 15 10 ...
##  $ FahrC     : chr  "T10111067" "T10061056" "T00001056" "T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ TempF     : num  30 30.9 32 35.1 36 ...
##  $ DewF      : num  19.9 21.9 21.9 21 21 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 180            10       NA " "           10    -1    -7      2984   112
## 2 51Z ~ 180            15       NA " "           10    -1    -6      2979    96
## 3 51Z ~ 180            13       NA " "           10     0    -6      2975    81
## 4 51Z ~ 190            17       NA " "           10     2    -6      2971    67
## 5 51Z ~ 190            15       NA " "           10     2    -6      2966    50
## 6 51Z ~ 210            20       NA " "           10     3    -6      2961    32
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 51Z ~ 260             9       NA " "           10   -18   -24      3066   403
## 2 51Z ~ 260             9       NA " "           10   -17   -24      3066   400
## 3 51Z ~ 270             9       NA " "           10   -17   -24      3066   402
## 4 51Z ~ 270             9       NA " "           10   -18   -24      3066   405
## 5 51Z ~ 280             8       NA " "           10   -18   -24      3068   411
## 6 51Z ~ 270             7       NA " "           10   -19   -24      3068   411
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 51Z ~ 290             8       NA " "           10    19    12      2986   107
##  2 51Z ~ 090             8       NA " "           10     9    -2      3004   175
##  3 51Z ~ 230            10       NA " "           10    -7   -14      3000   168
##  4 51Z ~ 170            12       NA " "           10    18    12      2946   973
##  5 51Z ~ 240             6       NA " "           10     1    -6      2990   134
##  6 51Z ~ 290             8       NA " "           10    22    12      2996   143
##  7 51Z ~ 050            12       NA " "           10    21    11      3011   195
##  8 51Z ~ 220             9       NA " "            4   -11   -14      3037   298
##  9 51Z ~ 000             0       NA " "            8    14    10      3031   265
## 10 51Z ~ 160             7       NA " "           10    22    16      3000   154
## 11 51Z ~ 160             8       NA " "           10    22    15      3004   167
## 12 51Z ~ 050            10       NA " "           10     8     6      3014   211
## 13 51Z ~ 310            12       NA " "           10    -2    -8      3011   204
## 14 51Z ~ 180            13       NA " "           10    21    12      3018   216
## 15 51Z ~ 230             5       NA " "           10     2    -4      2987   122
## 16 51Z ~ 010             9       NA " "           10    14     8      3004   169
## 17 51Z ~ 160             3       NA " "           10     8     4      3017   218
## 18 51Z ~ 300             7       NA " "           10   -16   -21      3069   408
## 19 51Z ~ 030             3       NA " "           10    -6   -12      3024   250
## 20 51Z ~ 040            16       NA " "           10    21    13      2987   110
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         11         11         11       8788         11         11         11 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         11         11         11         11          0          0         11 
##       DewF 
##         11

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 19 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0.125     1
##  2      0.25     20
##  3      0.5      33
##  4      0.75     20
##  5      1        29
##  6      1.25     16
##  7      1.5      44
##  8      1.75     30
##  9      2        55
## 10      2.5      59
## 11      3       111
## 12      4       123
## 13      5       163
## 14      6       154
## 15      7       137
## 16      8       207
## 17      9       228
## 18     10      7347
## 19     NA        11
##    WindGust    n
## 1        14   13
## 2        15   18
## 3        16   35
## 4        17   62
## 5        18   91
## 6        19  121
## 7        20  139
## 8        21  162
## 9        22  155
## 10       23  139
## 11       24   96
## 12       25   74
## 13       26   52
## 14       27   57
## 15       28   49
## 16       29   39
## 17       30   34
## 18       31   26
## 19       32   11
## 20       33   20
## 21       34   11
## 22       35    9
## 23       36    3
## 24       37    9
## 25       38    1
## 26       39    7
## 27       40    5
## 28       42    2
## 29       43    3
## 30       45    1
## 31       46    1
## 32       48    1
## 33       NA 7342
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8777 complete cases (99.9% of 8788 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.91  0.91     -0.32  -0.37     -0.04       0.16
## TempF       1.00  1.00  0.91  0.91     -0.32  -0.37     -0.04       0.16
## DewC        0.91  0.91  1.00  1.00     -0.40  -0.46     -0.12      -0.02
## DewF        0.91  0.91  1.00  1.00     -0.40  -0.46     -0.12      -0.02
## Altimeter  -0.32 -0.32 -0.40 -0.40      1.00   1.00     -0.23       0.17
## modSLP     -0.37 -0.37 -0.46 -0.46      1.00   1.00     -0.22       0.16
## WindSpeed  -0.04 -0.04 -0.12 -0.12     -0.23  -0.22      1.00       0.06
## Visibility  0.16  0.16 -0.02 -0.02      0.17   0.16      0.06       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18201 -0.44599 -0.02589  0.39396  1.50588 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.798e+01  7.155e-01  -39.11   <2e-16 ***
## Altimeter    3.480e-01  2.385e-04 1459.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5121 on 8775 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.9959, Adjusted R-squared:  0.9959 
## F-statistic: 2.129e+06 on 1 and 8775 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70814 -0.12671  0.00102  0.12754  0.63138 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.633e+00  2.667e-01  -21.12   <2e-16 ***
## Altimeter    3.410e-01  8.832e-05 3860.89   <2e-16 ***
## TempF       -2.549e-02  1.021e-04 -249.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1798 on 8774 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.9995, Adjusted R-squared:  0.9995 
## F-statistic: 8.664e+06 on 2 and 8774 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1 1454
## 2      0    0      0      0      1      0  457
## 3      0    0      0      0      1      1  624
## 4      0    0      0      0      2      0  212
## 5      0    0      0      0      2      1  136
## 6      0    0      0      0      3      0   62
## 7      0    0      0      0      3      1    3
## 8      0    0      0      0      4      0    3
## 9      0    0      0      1      0      0  377
## 10     0    0      0      1      0      1  174
## 11     0    0      0      1      1      0  231
## 12     0    0      0      1      1      1  200
## 13     0    0      0      1      2      0  121
## 14     0    0      0      1      2      1   10
## 15     0    0      0      1      3      0   19
## 16     0    0      0      1      3      1    1
## 17     0    0      0      1      4      0    1
## 18     0    0      0      2      0      0  140
## 19     0    0      0      2      0      1   23
## 20     0    0      0      2      1      0   60
## 21     0    0      0      2      1      1   13
## 22     0    0      0      2      2      0   18
## 23     0    0      0      3      0      0   11
## 24     0    0      0      3      1      0    2
## 25     0    0      0      4      0      0    1
## 26     0    0      1      0      0      0 1058
## 27     0    0      1      0      0      1  238
## 28     0    0      1      0      1      0  360
## 29     0    0      1      0      1      1  220
## 30     0    0      1      0      2      0  124
## 31     0    0      1      0      2      1   23
## 32     0    0      1      0      3      0   19
## 33     0    0      1      1      0      0  302
## 34     0    0      1      1      0      1  104
## 35     0    0      1      1      1      0  221
## 36     0    0      1      1      1      1   55
## 37     0    0      1      1      2      0   52
## 38     0    0      1      1      3      0    4
## 39     0    0      1      2      0      0   65
## 40     0    0      1      2      0      1    8
## 41     0    0      1      2      1      0   29
## 42     0    0      1      2      1      1    1
## 43     0    0      1      2      2      0    5
## 44     0    0      1      3      0      0    2
## 45     0    0      2      0      0      0  424
## 46     0    0      2      0      0      1   40
## 47     0    0      2      0      1      0   89
## 48     0    0      2      0      1      1    9
## 49     0    0      2      0      2      0   15
## 50     0    0      2      1      0      0  100
## 51     0    0      2      1      0      1    6
## 52     0    0      2      1      1      0   20
## 53     0    0      2      2      0      0    8
## 54     0    0      2      2      0      1    2
## 55     0    0      2      2      1      0    3
## 56     0    0      3      0      0      0   27
## 57     0    0      3      1      0      0    1
## 58     0    0      3      1      1      0    2
## 59     0    0      4      0      0      0    2
## 60     0    1      0      0      0      0   31
## 61     1    0      0      0      0      0  766
## 
## *** METAR records where no clouds were extracted ***
## character(0)

## 
## *** Dimensions for the cloud matrix ***
## [1] 8788   15
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8788 obs. of  7 variables:
##   ..$ isCLR : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV  : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8788] 0 1 0 0 1 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8788] 2 1 2 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8788] 0 0 0 2 0 1 0 0 0 0 ...
##   ..$ numOVC: int [1:8788] 0 0 0 0 1 1 1 1 1 1 ...
##  $ mtxCloud : chr [1:8788, 1:15] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,788 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA     NA     NA  13000     NA
##  2    NA     NA     NA  18000  13000
##  3    NA     NA     NA  13000     NA
##  4    NA     NA  14000     NA     NA
##  5    NA  25000     NA     NA  16000
##  6    NA  25000  22000     NA     NA
##  7    NA  20000     NA     NA     NA
##  8    NA  18000     NA     NA     NA
##  9    NA  18000     NA     NA     NA
## 10    NA  19000     NA     NA     NA
## # ... with 8,778 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  5 variables:
##   ..$ lowVV : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8788] NA NA NA NA 25000 25000 20000 18000 18000 19000 ...
##   ..$ lowBKN: num [1:8788] NA NA NA 14000 NA 22000 NA NA NA NA ...
##   ..$ lowSCT: num [1:8788] 13000 18000 13000 NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8788] NA 13000 NA NA 16000 NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8788] 999999 999999 999999 14000 25000 ...
##  $ minCloudLevel  : num [1:8788] 13000 13000 13000 14000 16000 22000 20000 18000 18000 19000 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  30 variables:
##  $ METAR     : chr  "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ WindDir   : chr  "180" "180" "180" "190" ...
##  $ WindSpeed : int  10 15 13 17 15 20 21 12 15 12 ...
##  $ WindGust  : num  NA NA NA 22 25 33 30 23 26 22 ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  -1 -1 0 2 2 3 3 3 4 3 ...
##  $ DewC      : int  -7 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##  $ Altimeter : int  2984 2979 2975 2971 2966 2961 2957 2957 2956 2954 ...
##  $ SLP       : int  112 96 81 67 50 32 19 17 15 10 ...
##  $ FahrC     : chr  "T10111067" "T10061056" "T00001056" "T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ TempF     : num  30 30.9 32 35.1 36 ...
##  $ DewF      : num  19.9 21.9 21.9 21 21 ...
##  $ modSLP    : num  1011 1010 1008 1007 1005 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 1 0 0 1 0 0 0 0 0 ...
##  $ numSCT    : int  2 1 2 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  0 0 0 2 0 1 0 0 0 0 ...
##  $ numOVC    : int  0 0 0 0 1 1 1 1 1 1 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  NA NA NA NA 25000 25000 20000 18000 18000 19000 ...
##  $ lowBKN    : num  NA NA NA 14000 NA 22000 NA NA NA NA ...
##  $ lowSCT    : num  13000 18000 13000 NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA 13000 NA NA 16000 NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 4 4 4 3 2 2 2 2 2 2 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kord2017METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  30 variables:
##   ..$ METAR     : chr [1:8788] "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ WindDir   : chr [1:8788] "180" "180" "180" "190" ...
##   ..$ WindSpeed : int [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ WindGust  : num [1:8788] NA NA NA 22 25 33 30 23 26 22 ...
##   ..$ Dummy     : chr [1:8788] " " " " " " " " ...
##   ..$ Visibility: num [1:8788] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8788] -1 -1 0 2 2 3 3 3 4 3 ...
##   ..$ DewC      : int [1:8788] -7 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8788] 2984 2979 2975 2971 2966 2961 2957 2957 2956 2954 ...
##   ..$ SLP       : int [1:8788] 112 96 81 67 50 32 19 17 15 10 ...
##   ..$ FahrC     : chr [1:8788] "T10111067" "T10061056" "T00001056" "T00171061" ...
##   ..$ dtime     : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ TempF     : num [1:8788] 30 30.9 32 35.1 36 ...
##   ..$ DewF      : num [1:8788] 19.9 21.9 21.9 21 21 ...
##   ..$ modSLP    : num [1:8788] 1011 1010 1008 1007 1005 ...
##   ..$ isCLR     : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV      : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8788] 0 1 0 0 1 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8788] 2 1 2 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8788] 0 0 0 2 0 1 0 0 0 0 ...
##   ..$ numOVC    : int [1:8788] 0 0 0 0 1 1 1 1 1 1 ...
##   ..$ lowVV     : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8788] NA NA NA NA 25000 25000 20000 18000 18000 19000 ...
##   ..$ lowBKN    : num [1:8788] NA NA NA 14000 NA 22000 NA NA NA NA ...
##   ..$ lowSCT    : num [1:8788] 13000 18000 13000 NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8788] NA 13000 NA NA 16000 NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 4 4 4 3 2 2 2 2 2 2 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  29 variables:
##   ..$ station          : chr [1:8788] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8788] 30 30.9 32 35.1 36 37.9 37 37.9 39 37.9 ...
##   ..$ dwpf             : num [1:8788] 19.9 21.9 21.9 21 21 21 21.9 21.9 21.9 21.9 ...
##   ..$ relh             : num [1:8788] 65.7 68.9 65.9 56 54.1 ...
##   ..$ drct             : num [1:8788] 180 180 180 190 190 210 200 210 220 220 ...
##   ..$ sknt             : num [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ p01i             : chr [1:8788] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8788] 29.8 29.8 29.8 29.7 29.7 ...
##   ..$ mslp             : num [1:8788] 1011 1010 1008 1007 1005 ...
##   ..$ vsby             : num [1:8788] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8788] NA NA NA 22 25 33 30 23 26 22 ...
##   ..$ skyc1            : chr [1:8788] "SCT" "FEW" "SCT" "BKN" ...
##   ..$ skyc2            : chr [1:8788] "SCT" "SCT" "SCT" "BKN" ...
##   ..$ skyc3            : chr [1:8788] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8788] NA NA NA NA ...
##   ..$ skyl1            : num [1:8788] 13000 13000 13000 14000 16000 22000 20000 18000 18000 19000 ...
##   ..$ skyl2            : num [1:8788] 22000 18000 18000 18000 25000 25000 NA NA NA NA ...
##   ..$ skyl3            : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8788] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8788] NA NA NA NA 27 33 32 NA 28 NA ...
##   ..$ peak_wind_drct   : num [1:8788] NA NA NA NA 190 200 210 NA 210 NA ...
##   ..$ peak_wind_time   : POSIXct[1:8788], format: NA NA ...
##   ..$ feel             : num [1:8788] 20.5 19.4 21.6 24.1 26 ...
##   ..$ metar            : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  32 variables:
##   ..$ station          : chr [1:8788] "ORD" "ORD" "ORD" "ORD" ...
##   ..$ valid            : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ tmpf             : num [1:8788] 30 30.9 32 35.1 36 37.9 37 37.9 39 37.9 ...
##   ..$ dwpf             : num [1:8788] 19.9 21.9 21.9 21 21 21 21.9 21.9 21.9 21.9 ...
##   ..$ relh             : num [1:8788] 65.7 68.9 65.9 56 54.1 ...
##   ..$ drct             : num [1:8788] 180 180 180 190 190 210 200 210 220 220 ...
##   ..$ sknt             : num [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ p01i             : chr [1:8788] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8788] 29.8 29.8 29.8 29.7 29.7 ...
##   ..$ mslp             : num [1:8788] 1011 1010 1008 1007 1005 ...
##   ..$ vsby             : num [1:8788] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8788] NA NA NA 22 25 33 30 23 26 22 ...
##   ..$ skyc1            : chr [1:8788] "SCT" "FEW" "SCT" "BKN" ...
##   ..$ skyc2            : chr [1:8788] "SCT" "SCT" "SCT" "BKN" ...
##   ..$ skyc3            : chr [1:8788] NA NA NA NA ...
##   ..$ skyc4            : chr [1:8788] NA NA NA NA ...
##   ..$ skyl1            : num [1:8788] 13000 13000 13000 14000 16000 22000 20000 18000 18000 19000 ...
##   ..$ skyl2            : num [1:8788] 22000 18000 18000 18000 25000 25000 NA NA NA NA ...
##   ..$ skyl3            : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_1hr: chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8788] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8788] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8788] NA NA NA NA 27 33 32 NA 28 NA ...
##   ..$ peak_wind_drct   : num [1:8788] NA NA NA NA 190 200 210 NA 210 NA ...
##   ..$ peak_wind_time   : POSIXct[1:8788], format: NA NA ...
##   ..$ feel             : num [1:8788] 20.5 19.4 21.6 24.1 26 ...
##   ..$ metar            : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ dirW             : chr [1:8788] "180" "180" "180" "190" ...
##   ..$ spdW             : num [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ gustW            : num [1:8788] NA NA NA 22 25 33 30 23 26 22 ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  13 variables:
##   ..$ METAR     : chr [1:8788] "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ WindDir   : chr [1:8788] "180" "180" "180" "190" ...
##   ..$ WindSpeed : chr [1:8788] "10" "15" "13" "17" ...
##   ..$ WindGust  : chr [1:8788] NA NA NA "G22" ...
##   ..$ Dummy     : chr [1:8788] " " " " " " " " ...
##   ..$ Visibility: chr [1:8788] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8788] "M01" "M01" "00" "02" ...
##   ..$ DewC      : chr [1:8788] "M07" "M06" "M06" "M06" ...
##   ..$ Altimeter : chr [1:8788] "A2984" "A2979" "A2975" "A2971" ...
##   ..$ SLP       : chr [1:8788] "SLP112" "SLP096" "SLP081" "SLP067" ...
##   ..$ FahrC     : chr [1:8788] "T10111067" "T10061056" "T00001056" "T00171061" ...
##   ..$ dtime     : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  15 variables:
##   ..$ METAR     : chr [1:8788] "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ WindDir   : chr [1:8788] "180" "180" "180" "190" ...
##   ..$ WindSpeed : int [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ WindGust  : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8788] " " " " " " " " ...
##   ..$ Visibility: num [1:8788] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8788] -1 -1 0 2 2 3 3 3 4 3 ...
##   ..$ DewC      : int [1:8788] -7 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8788] 2984 2979 2975 2971 2966 2961 2957 2957 2956 2954 ...
##   ..$ SLP       : int [1:8788] 112 96 81 67 50 32 19 17 15 10 ...
##   ..$ FahrC     : chr [1:8788] "T10111067" "T10061056" "T00001056" "T00171061" ...
##   ..$ dtime     : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ TempF     : num [1:8788] 30 30.9 32 35.1 36 ...
##   ..$ DewF      : num [1:8788] 19.9 21.9 21.9 21 21 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8788 obs. of  16 variables:
##   ..$ METAR     : chr [1:8788] "51Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "51Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "51Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056" "51Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ WindDir   : chr [1:8788] "180" "180" "180" "190" ...
##   ..$ WindSpeed : int [1:8788] 10 15 13 17 15 20 21 12 15 12 ...
##   ..$ WindGust  : num [1:8788] NA NA NA 22 25 33 30 23 26 22 ...
##   ..$ Dummy     : chr [1:8788] " " " " " " " " ...
##   ..$ Visibility: num [1:8788] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8788] -1 -1 0 2 2 3 3 3 4 3 ...
##   ..$ DewC      : int [1:8788] -7 -6 -6 -6 -6 -6 -6 -6 -6 -6 ...
##   ..$ Altimeter : int [1:8788] 2984 2979 2975 2971 2966 2961 2957 2957 2956 2954 ...
##   ..$ SLP       : int [1:8788] 112 96 81 67 50 32 19 17 15 10 ...
##   ..$ FahrC     : chr [1:8788] "T10111067" "T10061056" "T00001056" "T00171061" ...
##   ..$ dtime     : POSIXct[1:8788], format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##   ..$ origMETAR : chr [1:8788] "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##   ..$ TempF     : num [1:8788] 30 30.9 32 35.1 36 ...
##   ..$ DewF      : num [1:8788] 19.9 21.9 21.9 21 21 ...
##   ..$ modSLP    : num [1:8788] 1011 1010 1008 1007 1005 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ isVV  : num [1:8788] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8788] 0 1 0 0 1 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8788] 2 1 2 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8788] 0 0 0 2 0 1 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8788] 0 0 0 0 1 1 1 1 1 1 ...
##   ..$ mtxCloud : chr [1:8788, 1:15] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8788 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8788] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8788] NA NA NA NA 25000 25000 20000 18000 18000 19000 ...
##   .. ..$ lowBKN: num [1:8788] NA NA NA 14000 NA 22000 NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8788] 13000 18000 13000 NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8788] NA 13000 NA NA 16000 NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8788] 999999 999999 999999 14000 25000 ...
##   ..$ minCloudLevel  : num [1:8788] 13000 13000 13000 14000 16000 22000 20000 18000 18000 19000 ...

Create the base METAR file for Minneapolis, MN 2016 data:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kmsp_2016.txt"  # file name for raw METAR data
timeZ <- "53Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:53:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Minneapolis, MN"  # Description of city or location
shortMET <- "KMSP METAR (2016)"  # Station code and timing
longMET <- "Minneapolis, MN Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Minneapolis, MN (2016)
kmsp2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_character(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_character(),
##   ice_accretion_3hr = col_character(),
##   ice_accretion_6hr = col_character(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 11011 obs. of  29 variables:
##  $ station          : chr  "MSP" "MSP" "MSP" "MSP" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ tmpf             : num  23 21.9 21 19.9 19 ...
##  $ dwpf             : num  17.1 18 16 14 14 ...
##  $ relh             : num  77.6 84.5 80.5 77.3 80.4 ...
##  $ drct             : num  270 230 250 250 270 260 230 240 220 230 ...
##  $ sknt             : num  8 11 7 9 8 7 4 3 3 5 ...
##  $ p01i             : chr  "T" "T" "T" "T" ...
##  $ alti             : num  30.2 30.2 30.2 30.2 30.2 ...
##  $ mslp             : num  1024 1024 1024 NA 1024 ...
##  $ vsby             : num  4 5 9 9 10 10 9 8 8 9 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "OVC" "FEW" "OVC" "SCT" ...
##  $ skyc2            : chr  NA "BKN" NA "SCT" ...
##  $ skyc3            : chr  NA "OVC" NA "BKN" ...
##  $ skyc4            : chr  NA NA NA NA ...
##  $ skyl1            : num  1800 1200 1900 1600 1600 1600 12000 2600 2600 2100 ...
##  $ skyl2            : num  NA 1700 NA 4100 4100 3900 NA NA NA NA ...
##  $ skyl3            : num  NA 7000 NA 10000 NA 12000 NA NA NA NA ...
##  $ skyl4            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wxcodes          : chr  "-SN" "-SN BR" "-SN" NA ...
##  $ ice_accretion_1hr: chr  NA NA NA NA ...
##  $ ice_accretion_3hr: chr  NA NA NA NA ...
##  $ ice_accretion_6hr: chr  NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  13.03 9.73 11.4 8.52 8.15 ...
##  $ metar            : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310341Z 25009KT 9SM SCT016 SCT041 BKN100 M07/M10 A3019 RMK AO2 SNE40 P0000 T10671100" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-05-06 11:53:00 UTC" "2016-05-06 12:53:00 UTC"
##  [3] "2016-06-17 23:53:00 UTC" "2016-06-18 00:53:00 UTC"
##  [5] "2016-07-13 14:53:00 UTC" "2016-07-13 15:53:00 UTC"
##  [7] "2016-07-13 16:53:00 UTC" "2016-07-13 17:53:00 UTC"
##  [9] "2016-08-05 07:53:00 UTC" "2016-10-05 10:53:00 UTC"
## [11] "2016-10-18 03:53:00 UTC" "2016-11-21 00:53:00 UTC"
## [13] "2016-12-03 06:53:00 UTC" "2016-12-03 09:53:00 UTC"
## [15] "2016-12-11 18:53:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "27008KT" "270" "08" NA  
## [2,] "23011KT" "230" "11" NA  
## [3,] "25007KT" "250" "07" NA  
## [4,] "27008KT" "270" "08" NA  
## [5,] "26007KT" "260" "07" NA  
## [6,] "23004KT" "230" "04" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  538  209  153  143   96   99   96   96  126  113  136  171  256  416  467  391 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  310  292  239  198  217  192  230  234  197  208  197  229  230  266  298  364 
##  320  330  340  350  360  VRB <NA> 
##  339  287  183  185  195  215    6 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  538  605  699  786  824  814  734  766  625  521  467  368  290  209  167  126 
##   18   19   20   21   22   23   24   25   26   27   28   29   31 <NA> 
##   93   55   39   27   21   15    8    3    3    4    1    2    1    6 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##    7   23   51   90   97  105  157  111  134  111   86   73   61   45   41   23 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G45  G51 <NA> 
##   19   19   14    9    9    8    5    5    5    1    1    1    1 7505 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 6 x 1
##   metar                                                                      
##   <chr>                                                                      
## 1 KMSP 022153Z 10SM FEW070 18/M01 A3001 RMK AO2 SLP163 T01831011             
## 2 KMSP 211653Z 10SM FEW080 24/04 A3020 RMK AO2 SLP226 T02390039              
## 3 KMSP 301553Z 10SM SCT150 BKN250 25/13 A3000 RMK AO2 SLP154 T02500128 $     
## 4 KMSP 012053Z 10SM SCT250 22/07 A3015 RMK AO2 SLP210 T02220067 56012        
## 5 KMSP 131553Z 10SM FEW027 FEW250 17/09 A3020 RMK AO2 PRESRR SLP224 T01670089
## 6 KMSP 011653Z 10SM FEW025 M02/M08 A3003 RMK AO2 SLP179 T10171078

## Warning: Removed 6 rows containing non-finite values (stat_count).

## Warning: Removed 1 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##     7  8810 
## 
## *** Data Not Matched *** 
## [1] "KMSP 022153Z 10SM FEW070 18/M01 A3001 RMK AO2 SLP163 T01831011"             
## [2] "KMSP 211653Z 10SM FEW080 24/04 A3020 RMK AO2 SLP226 T02390039"              
## [3] "KMSP 301553Z 10SM SCT150 BKN250 25/13 A3000 RMK AO2 SLP154 T02500128 $"     
## [4] "KMSP 012053Z 10SM SCT250 22/07 A3015 RMK AO2 SLP210 T02220067 56012"        
## [5] "KMSP 131553Z 10SM FEW027 FEW250 17/09 A3020 RMK AO2 PRESRR SLP224 T01670089"
## [6] "KMSP 071453Z 26015G20KT 3SM -SN FEW019 SCT026 OVC032 M08/M13 A3001"         
## [7] "KMSP 011653Z 10SM FEW025 M02/M08 A3003 RMK AO2 SLP179 T10171078"            
## 
## *** Parsing matrix summary *** 
## [1] 8817   11
##      [,1]                                                                                      
## [1,] "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083"           
## [2,] "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078"
## [3,] "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089"           
## [4,] "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100"       
## [5,] "53Z 26007KT 10SM FEW016 FEW039 SCT120 M08/M11 A3020 RMK AO2 SLP241 T10781111"            
## [6,] "53Z 23004KT 9SM FEW120 M09/M11 A3020 RMK AO2 SLP242 4/005 60000 T10891111"               
##      [,2]  [,3] [,4] [,5] [,6]   [,7]  [,8]  [,9]    [,10]    [,11]      
## [1,] "270" "08" NA   " "  "4SM"  "M05" "M08" "A3019" "SLP237" "T10501083"
## [2,] "230" "11" NA   " "  "5SM"  "M06" "M08" "A3019" "SLP237" "T10561078"
## [3,] "250" "07" NA   " "  "9SM"  "M06" "M09" "A3019" "SLP239" "T10611089"
## [4,] "270" "08" NA   " "  "10SM" "M07" "M10" "A3019" "SLP238" "T10721100"
## [5,] "260" "07" NA   " "  "10SM" "M08" "M11" "A3020" "SLP241" "T10781111"
## [6,] "230" "04" NA   " "  "9SM"  "M09" "M11" "A3020" "SLP242" "T10891111"
## 
## *** Summary of the parsed data *** 
## Observations: 8,817
## Variables: 13
## $ METAR      <chr> "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 ...
## $ WindDir    <chr> "270", "230", "250", "270", "260", "230", "220", "230", ...
## $ WindSpeed  <chr> "08", "11", "07", "08", "07", "04", "03", "05", "06", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "4SM", "5SM", "9SM", "10SM", "10SM", "9SM", "8SM", "9SM"...
## $ TempC      <chr> "M05", "M06", "M06", "M07", "M08", "M09", "M08", "M08", ...
## $ DewC       <chr> "M08", "M08", "M09", "M10", "M11", "M11", "M11", "M10", ...
## $ Altimeter  <chr> "A3019", "A3019", "A3019", "A3019", "A3020", "A3020", "A...
## $ SLP        <chr> "SLP237", "SLP237", "SLP239", "SLP238", "SLP241", "SLP24...
## $ FahrC      <chr> "T10501083", "T10561078", "T10611089", "T10721100", "T10...
## $ dtime      <dttm> 2015-12-31 00:53:00, 2015-12-31 01:53:00, 2015-12-31 02...
## $ origMETAR  <chr> "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK A...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  15 variables:
##  $ METAR     : chr  "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ WindDir   : chr  "270" "230" "250" "270" ...
##  $ WindSpeed : int  8 11 7 8 7 4 3 5 6 6 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  4 5 9 10 10 9 8 9 9 9 ...
##  $ TempC     : int  -5 -6 -6 -7 -8 -9 -8 -8 -8 -8 ...
##  $ DewC      : int  -8 -8 -9 -10 -11 -11 -11 -10 -10 -11 ...
##  $ Altimeter : int  3019 3019 3019 3019 3020 3020 3019 3019 3019 3019 ...
##  $ SLP       : int  237 237 239 238 241 242 237 236 237 239 ...
##  $ FahrC     : chr  "T10501083" "T10561078" "T10611089" "T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ TempF     : num  23 21.9 21 19 18 ...
##  $ DewF      : num  17.1 18 16 14 12 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 53Z ~ 270             8       NA " "            4    -5    -8      3019   237
## 2 53Z ~ 230            11       NA " "            5    -6    -8      3019   237
## 3 53Z ~ 250             7       NA " "            9    -6    -9      3019   239
## 4 53Z ~ 270             8       NA " "           10    -7   -10      3019   238
## 5 53Z ~ 260             7       NA " "           10    -8   -11      3020   241
## 6 53Z ~ 230             4       NA " "            9    -9   -11      3020   242
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 53Z ~ 040             6       NA " "           10     0    -7      3001   174
## 2 53Z ~ 000             0       NA " "           10    -1    -7      3002   176
## 3 53Z ~ 090             6       NA " "           10    -1    -7      3003   182
## 4 53Z ~ 070             8       NA " "           10    -1    -7      3005   187
## 5 53Z ~ 090             6       NA " "           10    -2    -7      3007   194
## 6 53Z ~ 080             7       NA " "           10    -2    -7      3009   201
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 53Z ~ 300            10       NA " "           10    22    19      2977    74
##  2 53Z ~ 220            13       NA " "           10    33     8      2965    35
##  3 53Z ~ 260            15       NA " "           10    -8   -13      2987   128
##  4 53Z ~ 080             9       NA " "            4    13    11      2961    27
##  5 53Z ~ 280            17       NA " "           10   -15   -23      3017   238
##  6 53Z ~ 010            11       NA " "           10    15    10      3001   159
##  7 53Z ~ 180            11       NA " "           10    24    16      2994   133
##  8 53Z ~ 230             6       NA " "           10    -5   -11      3008   200
##  9 53Z ~ 290             6       NA " "           10    17    13      2992   128
## 10 53Z ~ 120            17       NA " "           10    18    17      2990   123
## 11 53Z ~ 110             8       NA " "           10    18     9      3020   227
## 12 53Z ~ 150             4       NA " "           10    19    -2      3015   213
## 13 53Z ~ 230             6       NA " "           10    -6    -9      3023   249
## 14 53Z ~ 030             5       NA " "           10    20    12      3006   180
## 15 53Z ~ 260             5       NA " "            9   -21   -25      3040   317
## 16 53Z ~ 280             3       NA " "           10    13    11      2978    83
## 17 53Z ~ 000             0       NA " "           10     6     3      3012   201
## 18 53Z ~ 010             9       NA " "            3     0    -2      2980   101
## 19 53Z ~ 210             6       NA " "           10     1    -6      3016   224
## 20 53Z ~ 250             5       NA " "           10    19    14      2978    83
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##          7          7          7       8817          7          7          7 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##          7          7          7          7          0          0          7 
##       DewF 
##          7

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 19 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0.125     3
##  2      0.25     10
##  3      0.5       8
##  4      0.75     20
##  5      1        17
##  6      1.25     16
##  7      1.5      25
##  8      1.75     25
##  9      2        42
## 10      2.5      38
## 11      3        93
## 12      4       126
## 13      5       153
## 14      6       169
## 15      7       223
## 16      8       223
## 17      9       346
## 18     10      7273
## 19     NA         7
##    WindGust    n
## 1        14    6
## 2        15   20
## 3        16   48
## 4        17   90
## 5        18   97
## 6        19  105
## 7        20  157
## 8        21  111
## 9        22  134
## 10       23  111
## 11       24   86
## 12       25   73
## 13       26   61
## 14       27   45
## 15       28   41
## 16       29   23
## 17       30   19
## 18       31   19
## 19       32   14
## 20       33    9
## 21       34    9
## 22       35    8
## 23       36    5
## 24       37    5
## 25       38    5
## 26       39    1
## 27       40    1
## 28       45    1
## 29       51    1
## 30       NA 7512
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8810 complete cases (99.9% of 8817 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.94  0.94     -0.23  -0.32     -0.02       0.23
## TempF       1.00  1.00  0.94  0.94     -0.23  -0.32     -0.02       0.23
## DewC        0.94  0.94  1.00  1.00     -0.28  -0.37     -0.09       0.11
## DewF        0.94  0.94  1.00  1.00     -0.28  -0.37     -0.09       0.11
## Altimeter  -0.23 -0.23 -0.28 -0.28      1.00   0.99     -0.25       0.13
## modSLP     -0.32 -0.32 -0.37 -0.37      0.99   1.00     -0.24       0.10
## WindSpeed  -0.02 -0.02 -0.09 -0.09     -0.25  -0.24      1.00       0.00
## Visibility  0.23  0.23  0.11  0.11      0.13   0.10      0.00       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3631 -0.6539 -0.1521  0.5753  2.4126 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.112e+01  1.136e+00   -27.4   <2e-16 ***
## Altimeter    3.491e-01  3.787e-04   921.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7761 on 8808 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.9897, Adjusted R-squared:  0.9897 
## F-statistic: 8.499e+05 on 1 and 8808 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80927 -0.14933 -0.00409  0.14322  0.79251 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.2404484  0.3262621  -16.06   <2e-16 ***
## Altimeter    0.3410710  0.0001084 3147.07   <2e-16 ***
## TempF       -0.0342268  0.0001058 -323.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2161 on 8807 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.9992, Adjusted R-squared:  0.9992 
## F-statistic: 5.531e+06 on 2 and 8807 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1 2008
## 2      0    0      0      0      1      0  594
## 3      0    0      0      0      1      1  491
## 4      0    0      0      0      2      0  198
## 5      0    0      0      0      2      1  115
## 6      0    0      0      0      3      0   23
## 7      0    0      0      0      3      1    7
## 8      0    0      0      0      4      0    4
## 9      0    0      0      1      0      0  538
## 10     0    0      0      1      0      1  190
## 11     0    0      0      1      1      0  216
## 12     0    0      0      1      1      1  127
## 13     0    0      0      1      2      0   47
## 14     0    0      0      1      2      1   12
## 15     0    0      0      1      3      0    2
## 16     0    0      0      2      0      0   84
## 17     0    0      0      2      0      1   16
## 18     0    0      0      2      1      0   32
## 19     0    0      0      2      1      1    7
## 20     0    0      0      2      2      0    6
## 21     0    0      0      2      2      1    1
## 22     0    0      0      2      3      0    2
## 23     0    0      0      3      0      0    6
## 24     0    0      0      3      1      0    1
## 25     0    0      1      0      0      0 1067
## 26     0    0      1      0      0      1  228
## 27     0    0      1      0      1      0  247
## 28     0    0      1      0      1      1  156
## 29     0    0      1      0      2      0   67
## 30     0    0      1      0      2      1    6
## 31     0    0      1      0      3      0    9
## 32     0    0      1      1      0      0  229
## 33     0    0      1      1      0      1   65
## 34     0    0      1      1      1      0   85
## 35     0    0      1      1      1      1   10
## 36     0    0      1      1      2      0   12
## 37     0    0      1      2      0      0   38
## 38     0    0      1      2      0      1    6
## 39     0    0      1      2      1      0   20
## 40     0    0      1      2      1      1    4
## 41     0    0      1      2      2      0    2
## 42     0    0      1      3      0      0    2
## 43     0    0      2      0      0      0  208
## 44     0    0      2      0      0      1   24
## 45     0    0      2      0      1      0   61
## 46     0    0      2      0      1      1    8
## 47     0    0      2      0      2      0    4
## 48     0    0      2      1      0      0   50
## 49     0    0      2      1      0      1    4
## 50     0    0      2      1      1      0   13
## 51     0    0      2      1      1      1    1
## 52     0    0      2      2      0      0    7
## 53     0    0      2      2      1      0    1
## 54     0    0      3      0      0      0    2
## 55     0    0      3      0      1      1    1
## 56     0    0      3      1      0      0    1
## 57     0    1      0      0      0      0   34
## 58     1    0      0      0      0      0 1418
## 
## *** METAR records where no clouds were extracted ***
## character(0)

## 
## *** Dimensions for the cloud matrix ***
## [1] 8817   13
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8817 obs. of  7 variables:
##   ..$ isCLR : num [1:8817] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV  : num [1:8817] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8817] 0 1 0 0 2 1 0 0 0 0 ...
##   ..$ numSCT: int [1:8817] 0 0 0 1 1 0 0 0 1 0 ...
##   ..$ numBKN: int [1:8817] 0 1 0 1 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8817] 1 1 1 0 0 0 1 1 0 0 ...
##  $ mtxCloud : chr [1:8817, 1:13] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,817 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   1800     NA     NA     NA
##  2    NA   7000   1700     NA   1200
##  3    NA   1900     NA     NA     NA
##  4    NA     NA   4100   1600     NA
##  5    NA     NA     NA  12000   1600
##  6    NA     NA     NA     NA  12000
##  7    NA   2600     NA     NA     NA
##  8    NA   2100     NA     NA     NA
##  9    NA     NA     NA   2300     NA
## 10    NA     NA     NA     NA     NA
## # ... with 8,807 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  5 variables:
##   ..$ lowVV : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8817] 1800 7000 1900 NA NA NA 2600 2100 NA NA ...
##   ..$ lowBKN: num [1:8817] NA 1700 NA 4100 NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8817] NA NA NA 1600 12000 NA NA NA 2300 NA ...
##   ..$ lowFEW: num [1:8817] NA 1200 NA NA 1600 12000 NA NA NA NA ...
##  $ minCeilingLevel: num [1:8817] 1800 1700 1900 4100 999999 ...
##  $ minCloudLevel  : num [1:8817] 1800 1200 1900 1600 1600 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  30 variables:
##  $ METAR     : chr  "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ WindDir   : chr  "270" "230" "250" "270" ...
##  $ WindSpeed : int  8 11 7 8 7 4 3 5 6 6 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  4 5 9 10 10 9 8 9 9 9 ...
##  $ TempC     : int  -5 -6 -6 -7 -8 -9 -8 -8 -8 -8 ...
##  $ DewC      : int  -8 -8 -9 -10 -11 -11 -11 -10 -10 -11 ...
##  $ Altimeter : int  3019 3019 3019 3019 3020 3020 3019 3019 3019 3019 ...
##  $ SLP       : int  237 237 239 238 241 242 237 236 237 239 ...
##  $ FahrC     : chr  "T10501083" "T10561078" "T10611089" "T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ TempF     : num  23 21.9 21 19 18 ...
##  $ DewF      : num  17.1 18 16 14 12 ...
##  $ modSLP    : num  1024 1024 1024 1024 1024 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 1 0 0 2 1 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 1 1 0 0 0 1 0 ...
##  $ numBKN    : int  0 1 0 1 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 0 0 0 1 1 0 0 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  1800 7000 1900 NA NA NA 2600 2100 NA NA ...
##  $ lowBKN    : num  NA 1700 NA 4100 NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA 1600 12000 NA NA NA 2300 NA ...
##  $ lowFEW    : num  NA 1200 NA NA 1600 12000 NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 3 4 5 2 2 4 6 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kmsp2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  30 variables:
##   ..$ METAR     : chr [1:8817] "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ WindDir   : chr [1:8817] "270" "230" "250" "270" ...
##   ..$ WindSpeed : int [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ WindGust  : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8817] " " " " " " " " ...
##   ..$ Visibility: num [1:8817] 4 5 9 10 10 9 8 9 9 9 ...
##   ..$ TempC     : int [1:8817] -5 -6 -6 -7 -8 -9 -8 -8 -8 -8 ...
##   ..$ DewC      : int [1:8817] -8 -8 -9 -10 -11 -11 -11 -10 -10 -11 ...
##   ..$ Altimeter : int [1:8817] 3019 3019 3019 3019 3020 3020 3019 3019 3019 3019 ...
##   ..$ SLP       : int [1:8817] 237 237 239 238 241 242 237 236 237 239 ...
##   ..$ FahrC     : chr [1:8817] "T10501083" "T10561078" "T10611089" "T10721100" ...
##   ..$ dtime     : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ TempF     : num [1:8817] 23 21.9 21 19 18 ...
##   ..$ DewF      : num [1:8817] 17.1 18 16 14 12 ...
##   ..$ modSLP    : num [1:8817] 1024 1024 1024 1024 1024 ...
##   ..$ isCLR     : num [1:8817] 0 0 0 0 0 0 0 0 0 1 ...
##   ..$ isVV      : num [1:8817] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8817] 0 1 0 0 2 1 0 0 0 0 ...
##   ..$ numSCT    : int [1:8817] 0 0 0 1 1 0 0 0 1 0 ...
##   ..$ numBKN    : int [1:8817] 0 1 0 1 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8817] 1 1 1 0 0 0 1 1 0 0 ...
##   ..$ lowVV     : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8817] 1800 7000 1900 NA NA NA 2600 2100 NA NA ...
##   ..$ lowBKN    : num [1:8817] NA 1700 NA 4100 NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8817] NA NA NA 1600 12000 NA NA NA 2300 NA ...
##   ..$ lowFEW    : num [1:8817] NA 1200 NA NA 1600 12000 NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 3 4 5 2 2 4 6 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  29 variables:
##   ..$ station          : chr [1:8817] "MSP" "MSP" "MSP" "MSP" ...
##   ..$ valid            : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ tmpf             : num [1:8817] 23 21.9 21 19 18 ...
##   ..$ dwpf             : num [1:8817] 17.1 18 16 14 12 ...
##   ..$ relh             : num [1:8817] 77.6 84.5 80.5 80.4 77.2 ...
##   ..$ drct             : num [1:8817] 270 230 250 270 260 230 220 230 220 230 ...
##   ..$ sknt             : num [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ p01i             : chr [1:8817] "T" "T" "T" "T" ...
##   ..$ alti             : num [1:8817] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8817] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8817] 4 5 9 10 10 9 8 9 9 9 ...
##   ..$ gust             : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8817] "OVC" "FEW" "OVC" "SCT" ...
##   ..$ skyc2            : chr [1:8817] NA "BKN" NA "BKN" ...
##   ..$ skyc3            : chr [1:8817] NA "OVC" NA NA ...
##   ..$ skyc4            : chr [1:8817] NA NA NA NA ...
##   ..$ skyl1            : num [1:8817] 1800 1200 1900 1600 1600 12000 2600 2100 2300 NA ...
##   ..$ skyl2            : num [1:8817] NA 1700 NA 4100 3900 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8817] NA 7000 NA NA 12000 NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8817] "-SN" "-SN BR" "-SN" NA ...
##   ..$ ice_accretion_1hr: chr [1:8817] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8817] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8817] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8817], format: NA NA ...
##   ..$ feel             : num [1:8817] 13.03 9.73 11.4 8.15 7.67 ...
##   ..$ metar            : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_character(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_double(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_character(),
##   .. ..   ice_accretion_3hr = col_character(),
##   .. ..   ice_accretion_6hr = col_character(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  32 variables:
##   ..$ station          : chr [1:8817] "MSP" "MSP" "MSP" "MSP" ...
##   ..$ valid            : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ tmpf             : num [1:8817] 23 21.9 21 19 18 ...
##   ..$ dwpf             : num [1:8817] 17.1 18 16 14 12 ...
##   ..$ relh             : num [1:8817] 77.6 84.5 80.5 80.4 77.2 ...
##   ..$ drct             : num [1:8817] 270 230 250 270 260 230 220 230 220 230 ...
##   ..$ sknt             : num [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ p01i             : chr [1:8817] "T" "T" "T" "T" ...
##   ..$ alti             : num [1:8817] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8817] 1024 1024 1024 1024 1024 ...
##   ..$ vsby             : num [1:8817] 4 5 9 10 10 9 8 9 9 9 ...
##   ..$ gust             : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8817] "OVC" "FEW" "OVC" "SCT" ...
##   ..$ skyc2            : chr [1:8817] NA "BKN" NA "BKN" ...
##   ..$ skyc3            : chr [1:8817] NA "OVC" NA NA ...
##   ..$ skyc4            : chr [1:8817] NA NA NA NA ...
##   ..$ skyl1            : num [1:8817] 1800 1200 1900 1600 1600 12000 2600 2100 2300 NA ...
##   ..$ skyl2            : num [1:8817] NA 1700 NA 4100 3900 NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8817] NA 7000 NA NA 12000 NA NA NA NA NA ...
##   ..$ skyl4            : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8817] "-SN" "-SN BR" "-SN" NA ...
##   ..$ ice_accretion_1hr: chr [1:8817] NA NA NA NA ...
##   ..$ ice_accretion_3hr: chr [1:8817] NA NA NA NA ...
##   ..$ ice_accretion_6hr: chr [1:8817] NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8817], format: NA NA ...
##   ..$ feel             : num [1:8817] 13.03 9.73 11.4 8.15 7.67 ...
##   ..$ metar            : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ dirW             : chr [1:8817] "270" "230" "250" "270" ...
##   ..$ spdW             : num [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ gustW            : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  13 variables:
##   ..$ METAR     : chr [1:8817] "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ WindDir   : chr [1:8817] "270" "230" "250" "270" ...
##   ..$ WindSpeed : chr [1:8817] "08" "11" "07" "08" ...
##   ..$ WindGust  : chr [1:8817] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8817] " " " " " " " " ...
##   ..$ Visibility: chr [1:8817] "4SM" "5SM" "9SM" "10SM" ...
##   ..$ TempC     : chr [1:8817] "M05" "M06" "M06" "M07" ...
##   ..$ DewC      : chr [1:8817] "M08" "M08" "M09" "M10" ...
##   ..$ Altimeter : chr [1:8817] "A3019" "A3019" "A3019" "A3019" ...
##   ..$ SLP       : chr [1:8817] "SLP237" "SLP237" "SLP239" "SLP238" ...
##   ..$ FahrC     : chr [1:8817] "T10501083" "T10561078" "T10611089" "T10721100" ...
##   ..$ dtime     : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  15 variables:
##   ..$ METAR     : chr [1:8817] "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ WindDir   : chr [1:8817] "270" "230" "250" "270" ...
##   ..$ WindSpeed : int [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ WindGust  : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8817] " " " " " " " " ...
##   ..$ Visibility: num [1:8817] 4 5 9 10 10 9 8 9 9 9 ...
##   ..$ TempC     : int [1:8817] -5 -6 -6 -7 -8 -9 -8 -8 -8 -8 ...
##   ..$ DewC      : int [1:8817] -8 -8 -9 -10 -11 -11 -11 -10 -10 -11 ...
##   ..$ Altimeter : int [1:8817] 3019 3019 3019 3019 3020 3020 3019 3019 3019 3019 ...
##   ..$ SLP       : int [1:8817] 237 237 239 238 241 242 237 236 237 239 ...
##   ..$ FahrC     : chr [1:8817] "T10501083" "T10561078" "T10611089" "T10721100" ...
##   ..$ dtime     : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ TempF     : num [1:8817] 23 21.9 21 19 18 ...
##   ..$ DewF      : num [1:8817] 17.1 18 16 14 12 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8817 obs. of  16 variables:
##   ..$ METAR     : chr [1:8817] "53Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "53Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "53Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089" "53Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ WindDir   : chr [1:8817] "270" "230" "250" "270" ...
##   ..$ WindSpeed : int [1:8817] 8 11 7 8 7 4 3 5 6 6 ...
##   ..$ WindGust  : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8817] " " " " " " " " ...
##   ..$ Visibility: num [1:8817] 4 5 9 10 10 9 8 9 9 9 ...
##   ..$ TempC     : int [1:8817] -5 -6 -6 -7 -8 -9 -8 -8 -8 -8 ...
##   ..$ DewC      : int [1:8817] -8 -8 -9 -10 -11 -11 -11 -10 -10 -11 ...
##   ..$ Altimeter : int [1:8817] 3019 3019 3019 3019 3020 3020 3019 3019 3019 3019 ...
##   ..$ SLP       : int [1:8817] 237 237 239 238 241 242 237 236 237 239 ...
##   ..$ FahrC     : chr [1:8817] "T10501083" "T10561078" "T10611089" "T10721100" ...
##   ..$ dtime     : POSIXct[1:8817], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8817] "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##   ..$ TempF     : num [1:8817] 23 21.9 21 19 18 ...
##   ..$ DewF      : num [1:8817] 17.1 18 16 14 12 ...
##   ..$ modSLP    : num [1:8817] 1024 1024 1024 1024 1024 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8817] 0 0 0 0 0 0 0 0 0 1 ...
##   .. ..$ isVV  : num [1:8817] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8817] 0 1 0 0 2 1 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8817] 0 0 0 1 1 0 0 0 1 0 ...
##   .. ..$ numBKN: int [1:8817] 0 1 0 1 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8817] 1 1 1 0 0 0 1 1 0 0 ...
##   ..$ mtxCloud : chr [1:8817, 1:13] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8817 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8817] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8817] 1800 7000 1900 NA NA NA 2600 2100 NA NA ...
##   .. ..$ lowBKN: num [1:8817] NA 1700 NA 4100 NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8817] NA NA NA 1600 12000 NA NA NA 2300 NA ...
##   .. ..$ lowFEW: num [1:8817] NA 1200 NA NA 1600 12000 NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8817] 1800 1700 1900 4100 999999 ...
##   ..$ minCloudLevel  : num [1:8817] 1800 1200 1900 1600 1600 ...

Create the base METAR file for Detroit, MI 2016 data:

# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_kdtw_2016.txt"  # file name for raw METAR data
timeZ <- "53Z"  # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:53:00", tz="UTC")  # Expected first time read
expDays <- 368  # Expected total days read
locMET <- "Detroit, MI"  # Description of city or location
shortMET <- "KDTW METAR (2016)"  # Station code and timing
longMET <- "Detroit, MI Hourly METAR (2016)"  # Description of city or location and timing

# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")

# Run the process for Detroit, MI (2016)
kdtw2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays, 
                             locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
                             )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   station = col_character(),
##   valid = col_datetime(format = ""),
##   p01i = col_character(),
##   skyc1 = col_character(),
##   skyc2 = col_character(),
##   skyc3 = col_character(),
##   skyc4 = col_logical(),
##   skyl4 = col_logical(),
##   wxcodes = col_character(),
##   ice_accretion_1hr = col_logical(),
##   ice_accretion_3hr = col_logical(),
##   ice_accretion_6hr = col_logical(),
##   peak_wind_time = col_datetime(format = ""),
##   metar = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 7 parsing failures.
##   row               col           expected   actual                                file
##  4170 skyc4             1/0/T/F/TRUE/FALSE OVC      './RInputFiles/metar_kdtw_2016.txt'
##  4170 skyl4             1/0/T/F/TRUE/FALSE 23000.00 './RInputFiles/metar_kdtw_2016.txt'
## 10271 ice_accretion_1hr 1/0/T/F/TRUE/FALSE 0.01     './RInputFiles/metar_kdtw_2016.txt'
## 10272 ice_accretion_1hr 1/0/T/F/TRUE/FALSE 0.01     './RInputFiles/metar_kdtw_2016.txt'
## 10273 ice_accretion_1hr 1/0/T/F/TRUE/FALSE 0.01     './RInputFiles/metar_kdtw_2016.txt'
## ..... ................. .................. ........ ...................................
## See problems(...) for more details.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10718 obs. of  29 variables:
##  $ station          : chr  "DTW" "DTW" "DTW" "DTW" ...
##  $ valid            : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ tmpf             : num  36 36 35.1 35.1 35.1 ...
##  $ dwpf             : num  26.1 27 27 25 24.1 ...
##  $ relh             : num  67 69.5 72 66.4 63.9 ...
##  $ drct             : num  230 220 240 240 230 230 250 250 250 230 ...
##  $ sknt             : num  7 9 9 10 8 8 6 8 7 6 ...
##  $ p01i             : chr  "0.00" "0.00" "0.00" "0.00" ...
##  $ alti             : num  30.2 30.2 30.2 30.2 30.2 ...
##  $ mslp             : num  1022 1023 1022 1022 1022 ...
##  $ vsby             : num  10 10 10 10 10 10 10 10 10 10 ...
##  $ gust             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyc1            : chr  "BKN" "OVC" "OVC" "OVC" ...
##  $ skyc2            : chr  "OVC" NA NA NA ...
##  $ skyc3            : chr  NA NA NA NA ...
##  $ skyc4            : logi  NA NA NA NA NA NA ...
##  $ skyl1            : num  2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##  $ skyl2            : num  5000 NA NA NA NA NA NA NA NA NA ...
##  $ skyl3            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ skyl4            : logi  NA NA NA NA NA NA ...
##  $ wxcodes          : chr  NA NA NA NA ...
##  $ ice_accretion_1hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_3hr: logi  NA NA NA NA NA NA ...
##  $ ice_accretion_6hr: logi  NA NA NA NA NA NA ...
##  $ peak_wind_gust   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_drct   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ peak_wind_time   : POSIXct, format: NA NA ...
##  $ feel             : num  29.6 28.4 27.3 26.8 27.9 ...
##  $ metar            : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
## 
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
##  [1] "2016-02-12 19:53:00 UTC" "2016-03-31 23:53:00 UTC"
##  [3] "2016-06-17 23:53:00 UTC" "2016-06-18 00:53:00 UTC"
##  [5] "2016-07-13 14:53:00 UTC" "2016-07-13 15:53:00 UTC"
##  [7] "2016-07-13 16:53:00 UTC" "2016-07-13 17:53:00 UTC"
##  [9] "2016-07-13 23:53:00 UTC" "2016-08-05 07:53:00 UTC"
## [11] "2016-08-16 13:53:00 UTC" "2016-11-21 00:53:00 UTC"
## [13] "2016-12-03 07:53:00 UTC" "2016-12-03 08:53:00 UTC"
## 
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
## 
## *** Are the extracted records unique? ***
## [1] TRUE
## 
## 
## *** First 6 winds and parsing ***
##      [,1]      [,2]  [,3] [,4]
## [1,] "23007KT" "230" "07" NA  
## [2,] "22009KT" "220" "09" NA  
## [3,] "24009KT" "240" "09" NA  
## [4,] "24010KT" "240" "10" NA  
## [5,] "23008KT" "230" "08" NA  
## [6,] "23008KT" "230" "08" NA  
## 
## *** Table of WIND DIRECTION ***
## 
##  000  010  020  030  040  050  060  070  080  090  100  110  120  130  140  150 
##  859  195  223  206  141  115  114  136  139  131   95   95   98  127  172  201 
##  160  170  180  190  200  210  220  230  240  250  260  270  280  290  300  310 
##  242  255  302  354  378  390  419  406  289  292  221  243  298  264  258  199 
##  320  330  340  350  360  VRB <NA> 
##  178  148  134  149  177  167    8 
## 
## *** Table of WIND SPEED ***
## 
##   00   03   04   05   06   07   08   09   10   11   12   13   14   15   16   17 
##  859  733  806  908  900  786  696  561  530  458  380  273  232  192  130  110 
##   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33 
##   75   39   42   34   18    8    8    6    4    7    4    3    2    2    1    1 
##   34   36 <NA> 
##    1    1    8 
## 
## *** Table of WIND GUST ***
## 
##  G14  G15  G16  G17  G18  G19  G20  G21  G22  G23  G24  G25  G26  G27  G28  G29 
##   29   52   51   74   78   88  102   93   81   69   61   56   48   39   32   20 
##  G30  G31  G32  G33  G34  G35  G36  G37  G38  G39  G40  G41  G42  G43  G45  G46 
##   15   21    7    6    8    2    3    5    5    1    2    2    1    2    2    1 
##  G48 <NA> 
##    1 7761 
## 
##  *** WIND DATA WAS NOT CAPTURED FROM: *** 
## # A tibble: 8 x 1
##   metar                                                                         
##   <chr>                                                                         
## 1 KDTW 110753Z 4SM -SN BLSN FEW006 BKN020 OVC035 M10/M14 A2995 RMK AO2 SLP152 P~
## 2 KDTW 161453Z 22019G22 10SM BKN014 OVC030 24/22 A2993 RMK SLP130 60000 T024402~
## 3 KDTW 161653Z 28011G19 10SM SCT015 OVC025 24/21 A2997 RMK RAE17 SLP144 P0001 T~
## 4 KDTW 161753Z 30011 10SM SCT015 BKN025 OVC050 26/21 A2999 RMK SLP150 60001 T02~
## 5 KDTW 161853Z 29010 10SM -RA SCT015 BKN029 OVC050 26/20 A3000 RMK RAB49 SLP154~
## 6 KDTW 231753Z 23008 10SM FEW050 SCT250 27/15 A3025 RMK SLPNO T02690145         
## 7 KDTW 011453Z 35008 10SM FEW008 BKN015 23/13 A3010 RMK SLPNO T02280132         
## 8 KDTW 161653Z 7SM FEW025 13/07 A2992 RMK AO2 SLP132 T01330072

## Warning: Removed 8 rows containing non-finite values (stat_count).

## Warning: Removed 6 rows containing missing values (geom_point).
## 
## *** Tentative Summary of Element Parsing *** 
## .
## FALSE  TRUE 
##    49  8769 
## 
## *** Data Not Matched *** 
##  [1] "KDTW 110753Z 4SM -SN BLSN FEW006 BKN020 OVC035 M10/M14 A2995 RMK AO2 SLP152 P0000 T11001139 $"           
##  [2] "KDTW 250653Z 30012KT 2SM -SN BR BKN007 OVC018 M01/M02 A2933 RMK AO2 TWR VIS 4 P0000 $"                   
##  [3] "KDTW 221553Z 18014KT 10SM SCT080 BKN230 11/00 A2986 RMK SLPNO T01170000"                                 
##  [4] "KDTW 230353Z 19010KT 10SM FEW130 OVC180 12/02 A2976 RMK AO2 SLPNO T01220022"                             
##  [5] "KDTW 140753Z 30005KT 8SM FEW047 SCT200 22/21 A2992 RMK SLPNO T02220206"                                  
##  [6] "KDTW 140853Z 31005KT 10SM FEW045 SCT200 22/20 A2993 RMK SLPNO T02170200"                                 
##  [7] "KDTW 140953Z 29005KT 10SM FEW080 SCT200 21/20 A2996 RMK SLPNO T02110200"                                 
##  [8] "KDTW 142053Z 24004KT 10SM FEW030 BKN045 OVC240 27/18 A3003 RMK AO2 SLPNO T02720183 58005"                
##  [9] "KDTW 142353Z 21008KT 8SM -RA SCT060 OVC080 24/20 A3006 RMK SLP177 60021"                                 
## [10] "KDTW 150253Z 20004KT 8SM SCT070 OVC100 22/19 A3011 RMK SLPNO T02170194"                                  
## [11] "KDTW 151353Z 00000KT 1SM R03R/3000V6000FT HZ BKN002 OVC017 23/20 A3016 RMK SFC VIS 1 1/2 SLPNO T02280200"
## [12] "KDTW 161453Z 22019G22 10SM BKN014 OVC030 24/22 A2993 RMK SLP130 60000 T02440211 53001"                   
## [13] "KDTW 161653Z 28011G19 10SM SCT015 OVC025 24/21 A2997 RMK RAE17 SLP144 P0001 T02440206"                   
## [14] "KDTW 161753Z 30011 10SM SCT015 BKN025 OVC050 26/21 A2999 RMK SLP150 60001 T02610206 10261 20233 51020"   
## [15] "KDTW 161853Z 29010 10SM -RA SCT015 BKN029 OVC050 26/20 A3000 RMK RAB49 SLP154 P0000 T02610200"           
## [16] "KDTW 231653Z 21010KT 10SM SCT025 BKN250 27/15 A3027 RMK SLPNO T02730150"                                 
## [17] "KDTW 231753Z 23008 10SM FEW050 SCT250 27/15 A3025 RMK SLPNO T02690145"                                   
## [18] "KDTW 231853Z 21010KT 10SM SCT050 BKN250 26/14 A3025 RMK AO2 SLPNO T02610144"                             
## [19] "KDTW 231953Z 21010KT 10SM FEW050 BKN250 27/15 A3023 RMK AO2 SLPNO T02670150"                             
## [20] "KDTW 232053Z 23008KT 10SM FEW050 BKN250 27/16 A3022 RMK AO2 SLPNO T02670156"                             
## [21] "KDTW 232153Z 20011KT 10SM FEW050 BKN250 27/15 A3022 RMK AO2 SLPNO T02670150"                             
## [22] "KDTW 232253Z 22011KT 10SM FEW050 BKN250 26/15 A3021 RMK AO2 SLPNO T02610150"                             
## [23] "KDTW 232353Z 21006KT 10SM FEW050 SCT250 25/16 A3021 RMK AO2 SLPNO T02500156 10272 20250 56004"           
## [24] "KDTW 240053Z 19006KT 10SM SCT220 24/16 A3022 RMK AO2 SLPNO T02390161"                                    
## [25] "KDTW 240153Z 17004KT 10SM SCT220 22/17 A3022 RMK AO2 SLPNO T02220167"                                    
## [26] "KDTW 240253Z 17004KT 10SM SCT220 21/16 A3021 RMK AO2 SLPNO T02110161 50000"                              
## [27] "KDTW 240353Z 18005KT 10SM SCT220 20/16 A3020 RMK AO2 SLPNO T02000161"                                    
## [28] "KDTW 240453Z 18003KT 10SM SCT220 20/16 A3019 RMK AO2 SLPNO T02000161 402720194"                          
## [29] "KDTW 240553Z 18005KT 10SM SCT220 19/17 A3018 RMK AO2 SLPNO T01940167 10250 20194 56010"                  
## [30] "KDTW 010053Z 01005KT 10SM FEW050 SCT220 22/13 A3001 RMK SLPNO T02220133"                                 
## [31] "KDTW 010153Z 01007KT 10SM FEW200 21/13 A3003 RMK SLPNO T02110133"                                        
## [32] "KDTW 010253Z 03007KT 10SM FEW200 20/13 A3004 RMK SLPNO T02000128"                                        
## [33] "KDTW 010353Z 03005KT 10SM FEW220 18/13 A3005 RMK SLPNO T01830128"                                        
## [34] "KDTW 010453Z 00000KT 9SM FEW220 18/13 A3004 RMK SLPNO T01780133"                                         
## [35] "KDTW 010553Z 02005KT 9SM FEW055 SCT220 17/13 A3004 RMK SLPNO T01720133"                                  
## [36] "KDTW 010653Z 03005KT 8SM SCT055 BKN220 17/13 A3004 RMK SLPNO T01720133"                                  
## [37] "KDTW 010753Z 35005KT 8SM BKN055 BKN220 17/14 A3005 RMK SLPNO T01670139"                                  
## [38] "KDTW 010853Z 35007KT 9SM BKN060 OVC220 17/13 A3005 RMK SLPNO T01670133"                                  
## [39] "KDTW 010953Z 35008KT 10SM BKN060 OVC220 17/13 A3005 RMK SLPNO T01720128"                                 
## [40] "KDTW 011053Z 35009KT 10SM SCT060 OVC220 17/13 A3006 RMK SLPNO T01720133"                                 
## [41] "KDTW 011153Z 34008KT 10SM FEW060 SCT220 18/13 A3007 RMK SLPNO T01830133"                                 
## [42] "KDTW 011253Z 02006KT 10SM SCT015 BKN220 19/15 A3007 RMK SLPNO T01970154"                                 
## [43] "KDTW 011353Z 35007KT 10SM SCT015 BKN220 23/14 A3008 RMK SLPNO T02290137"                                 
## [44] "KDTW 011453Z 35008 10SM FEW008 BKN015 23/13 A3010 RMK SLPNO T02280132"                                   
## [45] "KDTW 011553Z 01010KT 10SM BKN025 BKN060 23/13 A3010 RMK SLPNO T02260132"                                 
## [46] "KDTW 011653Z 03010KT 10SM SCT035 BKN075 A3010 RMK SLPNO T02310133"                                       
## [47] "KDTW 011753Z 01010KT 10SM SCT050 BKN080 22/13 A3010 RMK SLPNO T02230128"                                 
## [48] "KDTW 161653Z 7SM FEW025 13/07 A2992 RMK AO2 SLP132 T01330072"                                            
## [49] "KDTW 071453Z 23010KT 7SM FEW040 BKN130 01/M04 A3004"                                                     
## 
## *** Parsing matrix summary *** 
## [1] 8818   11
##      [,1]                                                                  
## [1,] "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033"
## [2,] "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028"       
## [3,] "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028"       
## [4,] "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039"       
## [5,] "53Z 23008KT 10SM OVC025 02/M04 A3017 RMK AO2 SLP224 T00171044"       
## [6,] "53Z 23008KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039"       
##      [,2]  [,3] [,4] [,5] [,6]   [,7] [,8]  [,9]    [,10]    [,11]      
## [1,] "230" "07" NA   " "  "10SM" "02" "M03" "A3017" "SLP223" "T00221033"
## [2,] "220" "09" NA   " "  "10SM" "02" "M03" "A3019" "SLP229" "T00221028"
## [3,] "240" "09" NA   " "  "10SM" "02" "M03" "A3018" "SLP224" "T00171028"
## [4,] "240" "10" NA   " "  "10SM" "02" "M04" "A3018" "SLP225" "T00171039"
## [5,] "230" "08" NA   " "  "10SM" "02" "M04" "A3017" "SLP224" "T00171044"
## [6,] "230" "08" NA   " "  "10SM" "02" "M04" "A3018" "SLP225" "T00171039"
## 
## *** Summary of the parsed data *** 
## Observations: 8,818
## Variables: 13
## $ METAR      <chr> "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP...
## $ WindDir    <chr> "230", "220", "240", "240", "230", "230", "250", "250", ...
## $ WindSpeed  <chr> "07", "09", "09", "10", "08", "08", "06", "08", "07", "0...
## $ WindGust   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy      <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC      <chr> "02", "02", "02", "02", "02", "02", "01", "01", "01", "0...
## $ DewC       <chr> "M03", "M03", "M03", "M04", "M04", "M04", "M04", "M04", ...
## $ Altimeter  <chr> "A3017", "A3019", "A3018", "A3018", "A3017", "A3018", "A...
## $ SLP        <chr> "SLP223", "SLP229", "SLP224", "SLP225", "SLP224", "SLP22...
## $ FahrC      <chr> "T00221033", "T00221028", "T00171028", "T00171039", "T00...
## $ dtime      <dttm> 2015-12-31 00:53:00, 2015-12-31 01:53:00, 2015-12-31 02...
## $ origMETAR  <chr> "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RM...
## Warning: NAs introduced by coercion

## 
##  *** Parsed data structure, head, tail, and random sample *** 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  15 variables:
##  $ METAR     : chr  "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ WindDir   : chr  "230" "220" "240" "240" ...
##  $ WindSpeed : int  7 9 9 10 8 8 6 8 7 6 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  2 2 2 2 2 2 1 1 1 1 ...
##  $ DewC      : int  -3 -3 -3 -4 -4 -4 -4 -4 -4 -4 ...
##  $ Altimeter : int  3017 3019 3018 3018 3017 3018 3016 3017 3019 3019 ...
##  $ SLP       : int  223 229 224 225 224 225 220 223 228 230 ...
##  $ FahrC     : chr  "T00221033" "T00221028" "T00171028" "T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ TempF     : num  36 36 35.1 35.1 35.1 ...
##  $ DewF      : num  26.1 27 27 25 24.1 ...
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 53Z ~ 230             7       NA " "           10     2    -3      3017   223
## 2 53Z ~ 220             9       NA " "           10     2    -3      3019   229
## 3 53Z ~ 240             9       NA " "           10     2    -3      3018   224
## 4 53Z ~ 240            10       NA " "           10     2    -4      3018   225
## 5 53Z ~ 230             8       NA " "           10     2    -4      3017   224
## 6 53Z ~ 230             8       NA " "           10     2    -4      3018   225
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
##   METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##   <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
## 1 53Z ~ 190            11       NA " "            8     4    -4      3016   219
## 2 53Z ~ 210            10       NA " "            8     4    -5      3016   220
## 3 53Z ~ 180             8       NA " "            8     4    -4      3018   226
## 4 53Z ~ 180             6       NA " "            8     2    -4      3020   233
## 5 53Z ~ VRB             3       NA " "            8     1    -5      3021   239
## 6 53Z ~ 180             3       NA " "            7     1    -4      3024   247
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
##    METAR WindDir WindSpeed WindGust Dummy Visibility TempC  DewC Altimeter   SLP
##    <chr> <chr>       <int>    <dbl> <chr>      <dbl> <int> <int>     <int> <int>
##  1 53Z ~ 000             0       NA " "           10    23    17      2986   105
##  2 53Z ~ 340             9       NA " "           10    22     4      2984   103
##  3 53Z ~ 260            12       NA " "            8     1    -2      2989   128
##  4 53Z ~ 050             4       NA " "           10     8    -1      2994   139
##  5 53Z ~ 220            15       NA " "            9    -7   -16      3002   176
##  6 53Z ~ 030             4       NA " "            7    21    15      2986   109
##  7 <NA>  <NA>           NA       NA  <NA>         NA    NA    NA        NA    NA
##  8 53Z ~ 240            13       NA " "           10     0    -4      3005   181
##  9 53Z ~ 110             9       NA " 1 ~          4    21    21      2983    96
## 10 53Z ~ 000             0       NA " "           10    16    13      3021   230
## 11 53Z ~ 040             6       NA " "           10    17    13      3028   252
## 12 53Z ~ 070             5       NA " "           10    16     1      3017   216
## 13 53Z ~ 240             9       NA " "            6     2    -2      3004   180
## 14 53Z ~ 000             0       NA " "            8    16    13      3001   161
## 15 53Z ~ 290             9       NA " "            8   -13   -18      3011   208
## 16 53Z ~ 220             9       NA " "            4    18    17      2963    31
## 17 53Z ~ 340             7       NA " 1 ~          4     8     7      2998   154
## 18 53Z ~ 060             3       NA " "            6     1    -3      2999   160
## 19 53Z ~ 250             5       NA " "           10     2    -3      3029   263
## 20 53Z ~ 150             6       NA " "           10    23    14      2998   150
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## #   TempF <dbl>, DewF <dbl>
## 
##  *** Number of NA values *** 
##      METAR    WindDir  WindSpeed   WindGust      Dummy Visibility      TempC 
##         49         49         49       8818         49         49         49 
##       DewC  Altimeter        SLP      FahrC      dtime  origMETAR      TempF 
##         49         49         49         49          0          0         49 
##       DewF 
##         49

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## # A tibble: 21 x 2
##    Visibility     n
##         <dbl> <int>
##  1      0         2
##  2      0.125    16
##  3      0.25     20
##  4      0.5      16
##  5      0.75     29
##  6      1        42
##  7      1.25     22
##  8      1.5      76
##  9      1.75     28
## 10      2       112
## # ... with 11 more rows
##    WindGust    n
## 1        14   29
## 2        15   49
## 3        16   51
## 4        17   73
## 5        18   76
## 6        19   88
## 7        20  102
## 8        21   93
## 9        22   80
## 10       23   69
## 11       24   61
## 12       25   56
## 13       26   48
## 14       27   39
## 15       28   32
## 16       29   20
## 17       30   15
## 18       31   21
## 19       32    7
## 20       33    6
## 21       34    8
## 22       35    2
## 23       36    3
## 24       37    5
## 25       38    5
## 26       39    1
## 27       40    2
## 28       41    2
## 29       42    1
## 30       43    2
## 31       45    2
## 32       46    1
## 33       48    1
## 34       NA 7768
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (position_stack).

## 
##  *** Correlations use 8769 complete cases (99.4% of 8818 total) ***
##            TempC TempF  DewC  DewF Altimeter modSLP WindSpeed Visibility
## TempC       1.00  1.00  0.92  0.92     -0.17  -0.24     -0.10       0.14
## TempF       1.00  1.00  0.92  0.92     -0.17  -0.24     -0.10       0.14
## DewC        0.92  0.92  1.00  1.00     -0.22  -0.28     -0.18      -0.06
## DewF        0.92  0.92  1.00  1.00     -0.22  -0.28     -0.18      -0.06
## Altimeter  -0.17 -0.17 -0.22 -0.22      1.00   1.00     -0.37       0.17
## modSLP     -0.24 -0.24 -0.28 -0.28      1.00   1.00     -0.35       0.16
## WindSpeed  -0.10 -0.10 -0.18 -0.18     -0.37  -0.35      1.00       0.08
## Visibility  0.14  0.14 -0.06 -0.06      0.17   0.16      0.08       1.00

## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96361 -0.44022 -0.03758  0.40772  1.41448 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.644e+01  7.472e-01  -22.01   <2e-16 ***
## Altimeter    3.442e-01  2.488e-04 1383.17   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4961 on 8767 degrees of freedom
##   (49 observations deleted due to missingness)
## Multiple R-squared:  0.9954, Adjusted R-squared:  0.9954 
## F-statistic: 1.913e+06 on 1 and 8767 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
##  *** Regression call is: modSLP ~ Altimeter + TempF ***
## 
## Call:
## lm(formula = formula(myChar), data = met)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57802 -0.12674  0.00481  0.12820  0.65708 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.713e+00  2.803e-01  -13.25   <2e-16 ***
## Altimeter    3.403e-01  9.302e-05 3658.80   <2e-16 ***
## TempF       -2.351e-02  9.941e-05 -236.48   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1826 on 8766 degrees of freedom
##   (49 observations deleted due to missingness)
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9994 
## F-statistic: 7.086e+06 on 2 and 8766 DF,  p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

## Warning: Removed 1 rows containing missing values (geom_point).

## 
## *** Counts by number of layers of each cloud type ***
##    isCLR isVV numFEW numSCT numBKN numOVC    n
## 1      0    0      0      0      0      1 1181
## 2      0    0      0      0      1      0  541
## 3      0    0      0      0      1      1  747
## 4      0    0      0      0      2      0  257
## 5      0    0      0      0      2      1  152
## 6      0    0      0      0      3      0   15
## 7      0    0      0      1      0      0  527
## 8      0    0      0      1      0      1  374
## 9      0    0      0      1      1      0  438
## 10     0    0      0      1      1      1  503
## 11     0    0      0      1      2      0  162
## 12     0    0      0      2      0      0  155
## 13     0    0      0      2      0      1   16
## 14     0    0      0      2      1      0   51
## 15     0    0      0      3      0      0    1
## 16     0    0      1      0      0      0  911
## 17     0    0      1      0      0      1  249
## 18     0    0      1      0      1      0  368
## 19     0    0      1      0      1      1  324
## 20     0    0      1      0      2      0  135
## 21     0    0      1      1      0      0  543
## 22     0    0      1      1      0      1  103
## 23     0    0      1      1      1      0  233
## 24     0    0      1      1      1      1    1
## 25     0    0      1      2      0      0   61
## 26     0    0      2      0      0      0   86
## 27     0    0      2      0      0      1    1
## 28     0    0      2      0      1      0    3
## 29     0    0      2      1      0      0   11
## 30     0    1      0      0      0      0   66
## 31     1    0      0      0      0      0  602
## 32     1    0      0      1      0      0    1
## 
## *** METAR records where no clouds were extracted ***
## character(0)

## 
## *** Dimensions for the cloud matrix ***
## [1] 8818   11
## List of 2
##  $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':   8818 obs. of  7 variables:
##   ..$ isCLR : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN: int [1:8818] 1 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC: int [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##  $ mtxCloud : chr [1:8818, 1:11] "" "" "" "" ...
## 
## *** Lowest clouds by type tibble ***
## # A tibble: 8,818 x 5
##    lowVV lowOVC lowBKN lowSCT lowFEW
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1    NA   5000   2500     NA     NA
##  2    NA   2100     NA     NA     NA
##  3    NA   2100     NA     NA     NA
##  4    NA   2500     NA     NA     NA
##  5    NA   2500     NA     NA     NA
##  6    NA   2500     NA     NA     NA
##  7    NA   2500     NA     NA     NA
##  8    NA   2000     NA     NA     NA
##  9    NA   2000     NA     NA     NA
## 10    NA   2000     NA     NA     NA
## # ... with 8,808 more rows

## List of 3
##  $ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  5 variables:
##   ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC: num [1:8818] 5000 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   ..$ lowBKN: num [1:8818] 2500 NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ minCeilingLevel: num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##  $ minCloudLevel  : num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  30 variables:
##  $ METAR     : chr  "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ WindDir   : chr  "230" "220" "240" "240" ...
##  $ WindSpeed : int  7 9 9 10 8 8 6 8 7 6 ...
##  $ WindGust  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dummy     : chr  " " " " " " " " ...
##  $ Visibility: num  10 10 10 10 10 10 10 10 10 10 ...
##  $ TempC     : int  2 2 2 2 2 2 1 1 1 1 ...
##  $ DewC      : int  -3 -3 -3 -4 -4 -4 -4 -4 -4 -4 ...
##  $ Altimeter : int  3017 3019 3018 3018 3017 3018 3016 3017 3019 3019 ...
##  $ SLP       : int  223 229 224 225 224 225 220 223 228 230 ...
##  $ FahrC     : chr  "T00221033" "T00221028" "T00171028" "T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ TempF     : num  36 36 35.1 35.1 35.1 ...
##  $ DewF      : num  26.1 27 27 25 24.1 ...
##  $ modSLP    : num  1022 1023 1022 1022 1022 ...
##  $ isCLR     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ isVV      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ htVV      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ numFEW    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numSCT    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numBKN    : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ numOVC    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ lowVV     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowOVC    : num  5000 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##  $ lowBKN    : num  2500 NA NA NA NA NA NA NA NA NA ...
##  $ lowSCT    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lowFEW    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...

str(kdtw2016METAR)
## List of 8
##  $ fullMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  30 variables:
##   ..$ METAR     : chr [1:8818] "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ WindDir   : chr [1:8818] "230" "220" "240" "240" ...
##   ..$ WindSpeed : int [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 2 2 2 2 2 2 1 1 1 1 ...
##   ..$ DewC      : int [1:8818] -3 -3 -3 -4 -4 -4 -4 -4 -4 -4 ...
##   ..$ Altimeter : int [1:8818] 3017 3019 3018 3018 3017 3018 3016 3017 3019 3019 ...
##   ..$ SLP       : int [1:8818] 223 229 224 225 224 225 220 223 228 230 ...
##   ..$ FahrC     : chr [1:8818] "T00221033" "T00221028" "T00171028" "T00171039" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ TempF     : num [1:8818] 36 36 35.1 35.1 35.1 ...
##   ..$ DewF      : num [1:8818] 26.1 27 27 25 24.1 ...
##   ..$ modSLP    : num [1:8818] 1022 1023 1022 1022 1022 ...
##   ..$ isCLR     : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ isVV      : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ htVV      : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ numFEW    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numSCT    : int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ numBKN    : int [1:8818] 1 0 0 0 0 0 0 0 0 0 ...
##   ..$ numOVC    : int [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ lowVV     : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowOVC    : num [1:8818] 5000 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   ..$ lowBKN    : num [1:8818] 2500 NA NA NA NA NA NA NA NA NA ...
##   ..$ lowSCT    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ lowFEW    : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ wType     : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ month     : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ funcMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  29 variables:
##   ..$ station          : chr [1:8818] "DTW" "DTW" "DTW" "DTW" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ tmpf             : num [1:8818] 36 36 35.1 35.1 35.1 ...
##   ..$ dwpf             : num [1:8818] 26.1 27 27 25 24.1 ...
##   ..$ relh             : num [1:8818] 67 69.5 72 66.4 63.9 ...
##   ..$ drct             : num [1:8818] 230 220 240 240 230 230 250 250 250 230 ...
##   ..$ sknt             : num [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8818] 1022 1023 1022 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "BKN" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8818] "OVC" NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   ..$ skyl2            : num [1:8818] 5000 NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 29.6 28.4 27.3 26.8 27.9 ...
##   ..$ metar            : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..- attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame':  7 obs. of  5 variables:
##   .. ..$ row     : int [1:7] 4170 4170 10271 10272 10273 10273 10278
##   .. ..$ col     : chr [1:7] "skyc4" "skyl4" "ice_accretion_1hr" "ice_accretion_1hr" ...
##   .. ..$ expected: chr [1:7] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
##   .. ..$ actual  : chr [1:7] "OVC" "23000.00" "0.01" "0.01" ...
##   .. ..$ file    : chr [1:7] "'./RInputFiles/metar_kdtw_2016.txt'" "'./RInputFiles/metar_kdtw_2016.txt'" "'./RInputFiles/metar_kdtw_2016.txt'" "'./RInputFiles/metar_kdtw_2016.txt'" ...
##   ..- attr(*, "spec")=
##   .. .. cols(
##   .. ..   station = col_character(),
##   .. ..   valid = col_datetime(format = ""),
##   .. ..   tmpf = col_double(),
##   .. ..   dwpf = col_double(),
##   .. ..   relh = col_double(),
##   .. ..   drct = col_double(),
##   .. ..   sknt = col_double(),
##   .. ..   p01i = col_character(),
##   .. ..   alti = col_double(),
##   .. ..   mslp = col_double(),
##   .. ..   vsby = col_double(),
##   .. ..   gust = col_double(),
##   .. ..   skyc1 = col_character(),
##   .. ..   skyc2 = col_character(),
##   .. ..   skyc3 = col_character(),
##   .. ..   skyc4 = col_logical(),
##   .. ..   skyl1 = col_double(),
##   .. ..   skyl2 = col_double(),
##   .. ..   skyl3 = col_double(),
##   .. ..   skyl4 = col_logical(),
##   .. ..   wxcodes = col_character(),
##   .. ..   ice_accretion_1hr = col_logical(),
##   .. ..   ice_accretion_3hr = col_logical(),
##   .. ..   ice_accretion_6hr = col_logical(),
##   .. ..   peak_wind_gust = col_double(),
##   .. ..   peak_wind_drct = col_double(),
##   .. ..   peak_wind_time = col_datetime(format = ""),
##   .. ..   feel = col_double(),
##   .. ..   metar = col_character()
##   .. .. )
##  $ windMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  32 variables:
##   ..$ station          : chr [1:8818] "DTW" "DTW" "DTW" "DTW" ...
##   ..$ valid            : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ tmpf             : num [1:8818] 36 36 35.1 35.1 35.1 ...
##   ..$ dwpf             : num [1:8818] 26.1 27 27 25 24.1 ...
##   ..$ relh             : num [1:8818] 67 69.5 72 66.4 63.9 ...
##   ..$ drct             : num [1:8818] 230 220 240 240 230 230 250 250 250 230 ...
##   ..$ sknt             : num [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ p01i             : chr [1:8818] "0.00" "0.00" "0.00" "0.00" ...
##   ..$ alti             : num [1:8818] 30.2 30.2 30.2 30.2 30.2 ...
##   ..$ mslp             : num [1:8818] 1022 1023 1022 1022 1022 ...
##   ..$ vsby             : num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ gust             : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyc1            : chr [1:8818] "BKN" "OVC" "OVC" "OVC" ...
##   ..$ skyc2            : chr [1:8818] "OVC" NA NA NA ...
##   ..$ skyc3            : chr [1:8818] NA NA NA NA ...
##   ..$ skyc4            : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ skyl1            : num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   ..$ skyl2            : num [1:8818] 5000 NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl3            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ skyl4            : logi [1:8818] NA NA NA NA NA NA ...
##   ..$ wxcodes          : chr [1:8818] NA NA NA NA ...
##   ..$ ice_accretion_1hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_3hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ ice_accretion_6hr: logi [1:8818] NA NA NA NA NA NA ...
##   ..$ peak_wind_gust   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_drct   : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ peak_wind_time   : POSIXct[1:8818], format: NA NA ...
##   ..$ feel             : num [1:8818] 29.6 28.4 27.3 26.8 27.9 ...
##   ..$ metar            : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ dirW             : chr [1:8818] "230" "220" "240" "240" ...
##   ..$ spdW             : num [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ gustW            : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##  $ initMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  13 variables:
##   ..$ METAR     : chr [1:8818] "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ WindDir   : chr [1:8818] "230" "220" "240" "240" ...
##   ..$ WindSpeed : chr [1:8818] "07" "09" "09" "10" ...
##   ..$ WindGust  : chr [1:8818] NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: chr [1:8818] "10SM" "10SM" "10SM" "10SM" ...
##   ..$ TempC     : chr [1:8818] "02" "02" "02" "02" ...
##   ..$ DewC      : chr [1:8818] "M03" "M03" "M03" "M04" ...
##   ..$ Altimeter : chr [1:8818] "A3017" "A3019" "A3018" "A3018" ...
##   ..$ SLP       : chr [1:8818] "SLP223" "SLP229" "SLP224" "SLP225" ...
##   ..$ FahrC     : chr [1:8818] "T00221033" "T00221028" "T00171028" "T00171039" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ convMETAR      :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  15 variables:
##   ..$ METAR     : chr [1:8818] "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ WindDir   : chr [1:8818] "230" "220" "240" "240" ...
##   ..$ WindSpeed : int [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 2 2 2 2 2 2 1 1 1 1 ...
##   ..$ DewC      : int [1:8818] -3 -3 -3 -4 -4 -4 -4 -4 -4 -4 ...
##   ..$ Altimeter : int [1:8818] 3017 3019 3018 3018 3017 3018 3016 3017 3019 3019 ...
##   ..$ SLP       : int [1:8818] 223 229 224 225 224 225 220 223 228 230 ...
##   ..$ FahrC     : chr [1:8818] "T00221033" "T00221028" "T00171028" "T00171039" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ TempF     : num [1:8818] 36 36 35.1 35.1 35.1 ...
##   ..$ DewF      : num [1:8818] 26.1 27 27 25 24.1 ...
##  $ parseMETAR     :Classes 'tbl_df', 'tbl' and 'data.frame': 8818 obs. of  16 variables:
##   ..$ METAR     : chr [1:8818] "53Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "53Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "53Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028" "53Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ WindDir   : chr [1:8818] "230" "220" "240" "240" ...
##   ..$ WindSpeed : int [1:8818] 7 9 9 10 8 8 6 8 7 6 ...
##   ..$ WindGust  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ Dummy     : chr [1:8818] " " " " " " " " ...
##   ..$ Visibility: num [1:8818] 10 10 10 10 10 10 10 10 10 10 ...
##   ..$ TempC     : int [1:8818] 2 2 2 2 2 2 1 1 1 1 ...
##   ..$ DewC      : int [1:8818] -3 -3 -3 -4 -4 -4 -4 -4 -4 -4 ...
##   ..$ Altimeter : int [1:8818] 3017 3019 3018 3018 3017 3018 3016 3017 3019 3019 ...
##   ..$ SLP       : int [1:8818] 223 229 224 225 224 225 220 223 228 230 ...
##   ..$ FahrC     : chr [1:8818] "T00221033" "T00221028" "T00171028" "T00171039" ...
##   ..$ dtime     : POSIXct[1:8818], format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##   ..$ origMETAR : chr [1:8818] "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##   ..$ TempF     : num [1:8818] 36 36 35.1 35.1 35.1 ...
##   ..$ DewF      : num [1:8818] 26.1 27 27 25 24.1 ...
##   ..$ modSLP    : num [1:8818] 1022 1023 1022 1022 1022 ...
##  $ initClouds     :List of 2
##   ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  7 variables:
##   .. ..$ isCLR : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ isVV  : num [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ htVV  : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ numFEW: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numSCT: int [1:8818] 0 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numBKN: int [1:8818] 1 0 0 0 0 0 0 0 0 0 ...
##   .. ..$ numOVC: int [1:8818] 1 1 1 1 1 1 1 1 1 1 ...
##   ..$ mtxCloud : chr [1:8818, 1:11] "" "" "" "" ...
##  $ processedClouds:List of 3
##   ..$ lowCloud       :Classes 'tbl_df', 'tbl' and 'data.frame':  8818 obs. of  5 variables:
##   .. ..$ lowVV : num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowOVC: num [1:8818] 5000 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   .. ..$ lowBKN: num [1:8818] 2500 NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowSCT: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   .. ..$ lowFEW: num [1:8818] NA NA NA NA NA NA NA NA NA NA ...
##   ..$ minCeilingLevel: num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...
##   ..$ minCloudLevel  : num [1:8818] 2500 2100 2100 2500 2500 2500 2500 2000 2000 2000 ...

Run a wind comparison for these new observations:

# Run for newly downloaded and processed data files
cpWind <- consolidatePlotWind(files=c("kord2015METAR", "kord2017METAR", 
                                      "kmsp2016METAR", "kdtw2016METAR"
                                      ), 
                              names=c("Chicago, IL (2015)", "Chicago, IL (2017)", 
                                      "Minneapolis, MN (2016)", "Detroit, MI (2016)"
                                      )
                              )

cpWind
## # A tibble: 479 x 5
##    month windDirGroup     n     pct src               
##    <fct> <fct>        <int>   <dbl> <chr>             
##  1 Jan   No Wind         32 0.0417  Chicago, IL (2015)
##  2 Jan   Variable         3 0.00391 Chicago, IL (2015)
##  3 Jan   N               88 0.115   Chicago, IL (2015)
##  4 Jan   NE              63 0.0821  Chicago, IL (2015)
##  5 Jan   E               27 0.0352  Chicago, IL (2015)
##  6 Jan   SE              35 0.0456  Chicago, IL (2015)
##  7 Jan   S               76 0.0991  Chicago, IL (2015)
##  8 Jan   SW             153 0.199   Chicago, IL (2015)
##  9 Jan   W              199 0.259   Chicago, IL (2015)
## 10 Jan   NW              91 0.119   Chicago, IL (2015)
## # ... with 469 more rows

The data can be examined for Chicago, IL 2015:

# Run for Chicago, IL 2015 rainfall
kordRain2015 <- runFullPrecipExtraction(kord2015METAR, 
                                        pType="RA", 
                                        titleText="Chicago, IL Rainfall (hours) in 2015", 
                                        yAxisText="Hours of Rain", 
                                        endExclude=c("2015-02-08 2308", "2015-05-17 1336", 
                                                     "2015-05-11 2020", "2015-06-29 0215",
                                                     "2015-12-28 1700"
                                                     ),
                                        beginExclude=c("2015-01-03 1022", "2015-01-03 1249", 
                                                       "2015-04-02 1125", "2015-06-21 0156",
                                                       "2015-08-15 0209"
                                                       ), 
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8227
## 2 TRUE          0     0     1
## 3 TRUE          0     1   177
## 4 TRUE          1     0   173
## 5 TRUE          1     1   164
## 6 TRUE          1     2    12
## 7 TRUE          2     1    16
## 8 TRUE          2     2     6
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   13.00   31.00   70.83   88.00  739.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Run for Chicago, IL 2015 snowfall
kordSnow2015 <- runFullPrecipExtraction(kord2015METAR, 
                                        pType="SN", 
                                        titleText="Chicago, IL Snowfall (hours) in 2015", 
                                        yAxisText="Hours of Snow", 
                                        endExclude=c("2015-01-08 1829", "2015-01-08 1845"),
                                        beginExclude=c("2015-01-09 0630"), 
                                        beginAdd=c("2015-02-25 2251", "2015-01-03 1151"),
                                        maxProb=1440, 
                                        sState=FALSE, 
                                        makePlots=TRUE
                                        )
## 
## Regex search code is: (SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8640
## 2 TRUE          0     1    47
## 3 TRUE          1     0    49
## 4 TRUE          1     1    34
## 5 TRUE          1     2     5
## 6 TRUE          3     1     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    20.0    52.5   191.8   203.5  2133.0 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 31
## Vector of Begins
##  [1] "2015-01-26 18:51:00 UTC" "2015-01-29 14:17:00 UTC"
##  [3] "2015-01-29 16:22:00 UTC" "2015-01-30 04:00:00 UTC"
##  [5] "2015-01-30 05:58:00 UTC" "2015-02-01 01:38:00 UTC"
##  [7] "2015-02-03 21:26:00 UTC" "2015-02-04 15:13:00 UTC"
##  [9] "2015-02-08 23:08:00 UTC" "2015-02-09 15:00:00 UTC"
## [11] "2015-02-09 17:08:00 UTC"
## 
## Vector of Ends
##  [1] "2015-01-27 07:47:00 UTC" "2015-01-29 15:05:00 UTC"
##  [3] "2015-01-29 16:44:00 UTC" "2015-01-30 05:49:00 UTC"
##  [5] "2015-01-30 06:30:00 UTC" "2015-02-02 13:11:00 UTC"
##  [7] "2015-02-04 00:22:00 UTC" "2015-02-04 19:42:00 UTC"
##  [9] "2015-02-08 23:23:00 UTC" "2015-02-09 16:20:00 UTC"
## [11] "2015-02-09 19:57:00 UTC"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Check for interval consistency in the Chicago, IL 2015 rainfall data
# FZRA on 2015-01-03 (4) and 2015-02-08 (1)
# 2015-05-11 1951 -RA without RAB (1)
# 2015-06-21 0251 -TSRA and RAB0156 but no following RAE (1)
# 2015-06-29 0151 -RA without RAB (1)
# 2015-08-15 best solution to miscoded RAEO6 rather than RAE06 (2)
# FZRA on 2015-12-28 (4)
tmp <- intervalConsistency(kordRain2015, pType="RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8294    0
##       TRUE     14  468
## 
## Mismatch at time 2015-01-03 10:51 UTC
## [1] "KORD 030851Z 13006KT 1 1/2SM -SN BR OVC025 M01/M03 A3010 RMK AO2 SNB00PLE34 SLP200 P0002 60002 T10111033 58009"                                 
## [2] "KORD 030951Z 13005KT 3SM BR FEW013 OVC050 M01/M02 A3005 RMK AO2 SNE50 SLP182 P0002 T10111022"                                                   
## [3] "KORD 031051Z 07003KT 3SM -FZRA OVC006 M01/M02 A3003 RMK AO2 SLP178 FZRAB22SNB0953E22 P0002 I1000 T10061017"                                     
## [4] "KORD 031151Z 13007KT 2 1/2SM -SN BR BKN005 BKN011 OVC049 M01/M02 A2999 RMK AO2 SLP162 P0003 60009 70009 I1001 I6001 T10061017 11006 21017 56037"
## [5] "KORD 031251Z 14007KT 2 1/2SM -FZRA BR OVC005 00/M01 A2996 RMK AO2 FZDZB1155E49FZRAB49SNE1155 SLP152 P0004 I1005 T00001006"                      
## 
## Mismatch at time 2015-01-03 12:51 UTC
## [1] "KORD 031051Z 07003KT 3SM -FZRA OVC006 M01/M02 A3003 RMK AO2 SLP178 FZRAB22SNB0953E22 P0002 I1000 T10061017"                                     
## [2] "KORD 031151Z 13007KT 2 1/2SM -SN BR BKN005 BKN011 OVC049 M01/M02 A2999 RMK AO2 SLP162 P0003 60009 70009 I1001 I6001 T10061017 11006 21017 56037"
## [3] "KORD 031251Z 14007KT 2 1/2SM -FZRA BR OVC005 00/M01 A2996 RMK AO2 FZDZB1155E49FZRAB49SNE1155 SLP152 P0004 I1005 T00001006"                      
## [4] "KORD 031351Z 15006KT 2 1/2SM -FZRA BR OVC006 00/M01 A2994 RMK AO2 SLP146 P0002 I1002 T00001006"                                                 
## [5] "KORD 031451Z 13007KT 2SM -FZRA BR BKN006 OVC040 01/M01 A2991 RMK AO2 SLP135 P0002 60008 I1000 I3007 T00061006 58027"                            
## 
## Mismatch at time 2015-01-03 13:51 UTC
## [1] "KORD 031151Z 13007KT 2 1/2SM -SN BR BKN005 BKN011 OVC049 M01/M02 A2999 RMK AO2 SLP162 P0003 60009 70009 I1001 I6001 T10061017 11006 21017 56037"
## [2] "KORD 031251Z 14007KT 2 1/2SM -FZRA BR OVC005 00/M01 A2996 RMK AO2 FZDZB1155E49FZRAB49SNE1155 SLP152 P0004 I1005 T00001006"                      
## [3] "KORD 031351Z 15006KT 2 1/2SM -FZRA BR OVC006 00/M01 A2994 RMK AO2 SLP146 P0002 I1002 T00001006"                                                 
## [4] "KORD 031451Z 13007KT 2SM -FZRA BR BKN006 OVC040 01/M01 A2991 RMK AO2 SLP135 P0002 60008 I1000 I3007 T00061006 58027"                            
## [5] "KORD 031551Z 16004KT 1 1/2SM -RA BR OVC006 01/00 A2993 RMK AO2 RAB01FZRAE01 SLP141 P0006 I1000 T00060000"                                       
## 
## Mismatch at time 2015-01-03 14:51 UTC
## [1] "KORD 031251Z 14007KT 2 1/2SM -FZRA BR OVC005 00/M01 A2996 RMK AO2 FZDZB1155E49FZRAB49SNE1155 SLP152 P0004 I1005 T00001006"
## [2] "KORD 031351Z 15006KT 2 1/2SM -FZRA BR OVC006 00/M01 A2994 RMK AO2 SLP146 P0002 I1002 T00001006"                           
## [3] "KORD 031451Z 13007KT 2SM -FZRA BR BKN006 OVC040 01/M01 A2991 RMK AO2 SLP135 P0002 60008 I1000 I3007 T00061006 58027"      
## [4] "KORD 031551Z 16004KT 1 1/2SM -RA BR OVC006 01/00 A2993 RMK AO2 RAB01FZRAE01 SLP141 P0006 I1000 T00060000"                 
## [5] "KORD 031651Z 00000KT 2SM R10L/5000VP6000FT -RA BR BKN006 OVC012 01/00 A2990 RMK AO2 SLP132 P0008 T00060000"               
## 
## Mismatch at time 2015-02-08 22:51 UTC
## [1] "KORD 082051Z 02016G21KT 1 3/4SM -DZ BR OVC005 01/M01 A2975 RMK AO2 DZB50 SLP080 P0000 60000 T00061006 53011"                                 
## [2] "KORD 082151Z 01016G21KT 2SM -RA BR OVC004 M01/M01 A2980 RMK AO2 DZE40RAB40 SLP098 P0000 T10061011"                                           
## [3] "KORD 082251Z 01014KT 6SM -FZRA BR OVC006 M01/M02 A2984 RMK AO2 RAE04FZRAB04 SLP110 P0001 I1001 T10061017"                                    
## [4] "KORD 082351Z 01015G22KT 6SM BR OVC005 M01/M02 A2987 RMK AO2 FZRAE08SNB08E23 SLP124 4/008 P0000 60001 I1000 I6001 T10111017 10022 21011 53024"
## [5] "KORD 090051Z 36014KT 7SM OVC006 M02/M03 A2990 RMK AO2 SLP133 T10171028"                                                                      
## 
## Mismatch at time 2015-05-11 19:51 UTC
## [1] "KORD 111751Z 20016G23KT 10SM BKN019 OVC046 19/16 A2979 RMK AO2 PK WND 18027/1704 SLP087 60006 T01940156 10194 20150 58015"
## [2] "KORD 111851Z 21014KT 10SM BKN021 OVC046 20/16 A2978 RMK AO2 PK WND 19026/1753 RAB05E17 SLP084 P0000 T02000156"            
## [3] "KORD 111951Z 24009KT 7SM R10L/5500VP6000FT -RA BKN021 OVC029 18/16 A2977 RMK AO2 SLP079 P0013 T01830161"                  
## [4] "KORD 112051Z 23007KT 10SM FEW009 FEW015 BKN026 18/17 A2976 RMK AO2 RAE20B37E46 SLP078 P0003 60016 T01830167 56009"        
## [5] "KORD 112151Z 25014G24KT 10SM SCT029 BKN035 BKN046 20/14 A2977 RMK AO2 PK WND 26027/2129 SLP080 T02000139 $"               
## 
## Mismatch at time 2015-06-21 02:51 UTC
## [1] "KORD 210051Z 22008KT 10SM -TSRA FEW013 SCT042CB BKN100 OVC250 21/19 A2980 RMK AO2 PK WND 31026/0026 TSB25 SLP087 OCNL LTGICCCCG OHD-NW TS OHD-NW MOV E P0007 T02110189"      
## [2] "KORD 210151Z 32007KT 9SM TS SCT044CB BKN110 BKN200 OVC250 21/19 A2979 RMK AO2 SLP085 RAE28 FRQ LTGICCCCG OHD-W-N TS OHD-W-N MOV E P0002 T02060189"                           
## [3] "KORD 210251Z 02008KT 10SM -TSRA FEW044CB SCT070 SCT110 OVC150 20/18 A2980 RMK AO2 RAB0156 PRESFR SLP088 FRQ LTGICCCCG VC ALQDS TS VC ALQDS MOV E P0015 60024 T02000183 55006"
## [4] "KORD 210551Z 19005KT 10SM SCT038 BKN050 BKN080 OVC130 20/18 A2982 RMK AO2 SLP094 60026 T02000178 10222 20194 402560117 53006"                                                
## [5] "KORD 210651Z 23003KT 10SM FEW020 BKN055 BKN080 OVC170 20/18 A2981 RMK AO2 SLP090 T02000183"                                                                                  
## 
## Mismatch at time 2015-06-29 01:51 UTC
## [1] "KORD 282251Z 19007KT 10SM SCT050 OVC110 24/14 A2983 RMK AO2 SLP098 T02390139"                                                      
## [2] "KORD 282351Z 17007KT 10SM FEW045 BKN090 OVC150 22/15 A2982 RMK AO2 RAB02E27 SLP097 VCSH NE P0000 60000 T02170150 10261 20217 56005"
## [3] "KORD 290151Z 22004KT 10SM -RA FEW060 BKN100 OVC150 20/16 A2982 RMK AO2 SLP095 P0001 T02000156"                                     
## [4] "KORD 290251Z 22005KT 10SM FEW049 SCT065 OVC110 20/16 A2984 RMK AO2 RAE15 SLP101 P0000 60001 T02000156 53006"                       
## [5] "KORD 290351Z 00000KT 10SM -RA FEW050 BKN070 OVC100 19/17 A2983 RMK AO2 RAB05 SLP099 P0001 T01890167"                               
## 
## Mismatch at time 2015-08-15 02:51 UTC
## [1] "KORD 150051Z 25010KT 10SM BKN150 BKN250 29/19 A3005 RMK AO2 SLP171 T02890194"                                                                                                      
## [2] "KORD 150151Z 27010KT 10SM FEW060 BKN150 OVC250 28/21 A3008 RMK AO2 SLP180 OCNL LTGIC DSNT NW-N CB DSNT NW-N T02830211"                                                             
## [3] "KORD 150251Z 35008KT 6SM TSRA SCT060CB BKN100 OVC120 21/18 A3014 RMK AO2 PK WND 32035/0205 WSHFT 0155 RAB09 TSB00 SLP204 OCNL LTGICCG OHD TS OHD MOV S P0008 60008 T02060178 53026"
## [4] "KORD 150351Z 18011KT 10SM -TSRA FEW090CB OVC120 21/17 A3006 RMK AO2 TSE22B27 PRESFR SLP176 OCNL LTGICCC VC S-SW TS VC S-SW MOV S P0012 T02110172"                                  
## [5] "KORD 150451Z 26009G17KT 10SM BKN110 BKN150 21/18 A3010 RMK AO2 SLP189 RAEO6 TSE04 P0001 T02060183"                                                                                 
## 
## Mismatch at time 2015-08-15 03:51 UTC
## [1] "KORD 150151Z 27010KT 10SM FEW060 BKN150 OVC250 28/21 A3008 RMK AO2 SLP180 OCNL LTGIC DSNT NW-N CB DSNT NW-N T02830211"                                                             
## [2] "KORD 150251Z 35008KT 6SM TSRA SCT060CB BKN100 OVC120 21/18 A3014 RMK AO2 PK WND 32035/0205 WSHFT 0155 RAB09 TSB00 SLP204 OCNL LTGICCG OHD TS OHD MOV S P0008 60008 T02060178 53026"
## [3] "KORD 150351Z 18011KT 10SM -TSRA FEW090CB OVC120 21/17 A3006 RMK AO2 TSE22B27 PRESFR SLP176 OCNL LTGICCC VC S-SW TS VC S-SW MOV S P0012 T02110172"                                  
## [4] "KORD 150451Z 26009G17KT 10SM BKN110 BKN150 21/18 A3010 RMK AO2 SLP189 RAEO6 TSE04 P0001 T02060183"                                                                                 
## [5] "KORD 150551Z 00000KT 10SM BKN050 OVC230 20/18 A3011 RMK AO2 SLP190 OCNL LTGIC DSNT N CB DSNT N 60021 T02000183 10294 20194 403220194 55013"                                        
## 
## Mismatch at time 2015-12-28 13:51 UTC
## [1] "KORD 281151Z 07021G27KT 10SM OVC020 01/M04 A3023 RMK AO2 PK WND 07027/1148 SLP244 70004 T00061039 10017 20006 56021"                                      
## [2] "KORD 281251Z 07018G24KT 9SM -PLRA OVC022 00/M03 A3019 RMK AO2 PK WND 06028/1219 SLP230 PLB06RAB49 P0001 T00001033"                                        
## [3] "KORD 281351Z 07015G26KT 2 1/2SM -FZRAPL BR FEW013 BKN017 OVC070 M01/M02 A3015 RMK AO2 PK WND 07030/1317 RAE12FZRAB44SNB15E41 SLP217 P0003 I1000 T10061022"
## [4] "KORD 281451Z 06023G32KT 4SM -FZRAPL BR BKN013 OVC018 M01/M03 A3010 RMK AO2 PK WND 06032/1448 SLP199 P0003 60007 I1001 I3001 T10061028 58044"              
## [5] "KORD 281551Z 07024G33KT 4SM -FZRAPL BR OVC014 M01/M03 A3009 RMK AO2 PK WND 07033/1550 SLP196 P0001 I1000 T10061028"                                       
## 
## Mismatch at time 2015-12-28 14:51 UTC
## [1] "KORD 281251Z 07018G24KT 9SM -PLRA OVC022 00/M03 A3019 RMK AO2 PK WND 06028/1219 SLP230 PLB06RAB49 P0001 T00001033"                                        
## [2] "KORD 281351Z 07015G26KT 2 1/2SM -FZRAPL BR FEW013 BKN017 OVC070 M01/M02 A3015 RMK AO2 PK WND 07030/1317 RAE12FZRAB44SNB15E41 SLP217 P0003 I1000 T10061022"
## [3] "KORD 281451Z 06023G32KT 4SM -FZRAPL BR BKN013 OVC018 M01/M03 A3010 RMK AO2 PK WND 06032/1448 SLP199 P0003 60007 I1001 I3001 T10061028 58044"              
## [4] "KORD 281551Z 07024G33KT 4SM -FZRAPL BR OVC014 M01/M03 A3009 RMK AO2 PK WND 07033/1550 SLP196 P0001 I1000 T10061028"                                       
## [5] "KORD 281651Z 07017G32KT 2SM FZRAPL BR SCT013 OVC018 M01/M03 A3003 RMK AO2 PK WND 07033/1637 SLP176 P0004 I1000 T10061028"                                 
## 
## Mismatch at time 2015-12-28 15:51 UTC
## [1] "KORD 281351Z 07015G26KT 2 1/2SM -FZRAPL BR FEW013 BKN017 OVC070 M01/M02 A3015 RMK AO2 PK WND 07030/1317 RAE12FZRAB44SNB15E41 SLP217 P0003 I1000 T10061022"    
## [2] "KORD 281451Z 06023G32KT 4SM -FZRAPL BR BKN013 OVC018 M01/M03 A3010 RMK AO2 PK WND 06032/1448 SLP199 P0003 60007 I1001 I3001 T10061028 58044"                  
## [3] "KORD 281551Z 07024G33KT 4SM -FZRAPL BR OVC014 M01/M03 A3009 RMK AO2 PK WND 07033/1550 SLP196 P0001 I1000 T10061028"                                           
## [4] "KORD 281651Z 07017G32KT 2SM FZRAPL BR SCT013 OVC018 M01/M03 A3003 RMK AO2 PK WND 07033/1637 SLP176 P0004 I1000 T10061028"                                     
## [5] "KORD 281751Z 06019G29KT 1 3/4SM PL BR BKN010 OVC015 00/M02 A2998 RMK AO2 PK WND 06033/1723 FZRAE00 SLP158 P0009 60021 I1000 I6001 T00001017 10006 21006 58040"
## 
## Mismatch at time 2015-12-28 16:51 UTC
## [1] "KORD 281451Z 06023G32KT 4SM -FZRAPL BR BKN013 OVC018 M01/M03 A3010 RMK AO2 PK WND 06032/1448 SLP199 P0003 60007 I1001 I3001 T10061028 58044"                  
## [2] "KORD 281551Z 07024G33KT 4SM -FZRAPL BR OVC014 M01/M03 A3009 RMK AO2 PK WND 07033/1550 SLP196 P0001 I1000 T10061028"                                           
## [3] "KORD 281651Z 07017G32KT 2SM FZRAPL BR SCT013 OVC018 M01/M03 A3003 RMK AO2 PK WND 07033/1637 SLP176 P0004 I1000 T10061028"                                     
## [4] "KORD 281751Z 06019G29KT 1 3/4SM PL BR BKN010 OVC015 00/M02 A2998 RMK AO2 PK WND 06033/1723 FZRAE00 SLP158 P0009 60021 I1000 I6001 T00001017 10006 21006 58040"
## [5] "KORD 281851Z 06022G29KT 1 1/2SM PL BR OVC008 00/M01 A2988 RMK AO2 PK WND 06034/1815 PRESFR SLP123 P0015 I1000 T00001011"
# Check for interval consistency in the Chicago, IL 2015 snowfall data
# 2015-01-09 0651 BLSN but thinks it is SN (interval OK)
tmp <- intervalConsistency(kordSnow2015, pType="SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8496    0
##       TRUE      1  279
## 
## Mismatch at time 2015-01-09 06:51 UTC
## [1] "KORD 090451Z 28013G22KT 1 1/2SM R10L/5500VP6000FT -SN BLSN FEW020 BKN024 OVC035 M09/M13 A2986 RMK AO2 PK WND 28027/0356 SLP127 P0000 T10941128"                                               
## [2] "KORD 090551Z 31024G34KT 2SM R10L/5000VP6000FT -SN BLSN BKN026 BKN033 OVC041 M11/M16 A2991 RMK AO2 PK WND 30034/0551 TWR VIS 5 PRESRR SLP145 P0000 60002 T11111156 11078 21117 410781222 53048"
## [3] "KORD 090651Z 30017KT 6SM BLSN SCT012 M13/M18 A2998 RMK AO2 PK WND 33033/0635 SNE16B30E37 SLP168 P0000 T11281183"                                                                              
## [4] "KORD 090751Z 31014KT 10SM FEW020 M13/M18 A3001 RMK AO2 SLP178 PRESENT WX DRSN T11281183"                                                                                                      
## [5] "KORD 090851Z 29011KT 10SM FEW032 M13/M18 A3005 RMK AO2 SLP189 PRESENT WX DRSN 60000 T11281178 51045"
# Original pass for Chicago, IL
x7 <- findPrecipTypes(kordRain2015)
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  6472  2304 
## 
## 
## Multiple Precipitation types in the same record include
## .
##        -RA BR        -SN BR        -DZ BR         RA BR        BR CLR 
##           138           101            82            44            23 
##      -SN BLSN      -FZDZ BR       SN FZFG      -TSRA BR        +RA BR 
##            16            12            11            10             8 
##        -DZ FG       TSRA BR   -SN BLSN BR  SN BLSN FZFG      -FZRA BR 
##             7             7             6             6             5 
##      +SN FZFG      +TSRA BR    -FZRAPL BR         SN FG        -RA FG 
##             5             5             4             4             3 
## +SN BLSN FZFG    -FZDZSN BR      -RAPL BR      -SN FZFG         PL BR 
##             3             2             2             2             2 
##       RAPL BR      -PLSN BR      +SN BLSN      +TSRA FG     FZRAPL BR 
##             2             1             1             1             1 
## RVRNO TSRA BR       TSRA FG 
##             1             1 
## 
## Single Precipitation types in the same record include
## .
##     CLR      BR     -RA     -SN      HZ      FG     -DZ   -TSRA      TS      FU 
##     928     365     220     118      38      35      20      17      13       6 
##   -FZDZ    FZFG      RA    TSRA     +RA -FZDZSN   -FZRA     -PL   -PLRA   -PLSN 
##       5       5       4       4       2       1       1       1       1       1 
##   +TSRA    BLSN      SN 
##       1       1       1
# Exclude CLR as it is a cloud type, not precipitation
x8 <- findPrecipTypes(kordRain2015, exclTypes=c("CLR"))
## 
## Precipitation data status by METAR record
## 
## pExists
## FALSE  TRUE 
##  7400  1376 
## 
## 
## Multiple Precipitation types in the same record include
## .
##        -RA BR        -SN BR        -DZ BR         RA BR      -SN BLSN 
##           138           101            82            44            16 
##      -FZDZ BR       SN FZFG      -TSRA BR        +RA BR        -DZ FG 
##            12            11            10             8             7 
##       TSRA BR   -SN BLSN BR  SN BLSN FZFG      -FZRA BR      +SN FZFG 
##             7             6             6             5             5 
##      +TSRA BR    -FZRAPL BR         SN FG        -RA FG +SN BLSN FZFG 
##             5             4             4             3             3 
##    -FZDZSN BR      -RAPL BR      -SN FZFG         PL BR       RAPL BR 
##             2             2             2             2             2 
##      -PLSN BR      +SN BLSN      +TSRA FG     FZRAPL BR RVRNO TSRA BR 
##             1             1             1             1             1 
##       TSRA FG 
##             1 
## 
## Single Precipitation types in the same record include
## .
##      BR     -RA     -SN      HZ      FG     -DZ   -TSRA      TS      FU   -FZDZ 
##     388     220     118      38      35      20      17      13       6       5 
##    FZFG      RA    TSRA     +RA -FZDZSN   -FZRA     -PL   -PLRA   -PLSN   +TSRA 
##       5       4       4       2       1       1       1       1       1       1 
##    BLSN      SN 
##       1       1
# Get the Chicago, IL 2015 liquid precipitation
kord2015Precip <- extractLiquidPrecipAmounts(kordRain2015)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8152  158  101   85   42   39   19   19   22   10    9   16   12    9    7    8 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.31 0.33 
##    6    4    3    4    5    5    2    3    2    3    3    1    2    4    2    2 
## 0.34 0.35 0.37 0.39  0.4 0.43 0.44 0.49 0.58 0.66 0.71 0.77 0.81 0.88 
##    1    2    1    1    1    2    1    1    1    1    1    1    1    2 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8381   51   44   28   20   18   20   19   11   16    7    6   16    6    8    4 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.31 0.32 
##    6    6    4    5    4    6    2    7    6    3    6    6    2    6    1    3 
## 0.33 0.34 0.35 0.36 0.37 0.39  0.4 0.41 0.42 0.43 0.44 0.46 0.49 0.53 0.54 0.58 
##    2    2    2    2    3    2    3    2    1    1    2    1    3    1    2    2 
## 0.59 0.61 0.62 0.63 0.69 0.71 0.84 0.89 0.93 0.98 1.12 1.16 1.24 1.29  1.3 1.64 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 2.04 
##    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8644   10    9    9    9    1    2    4    2    5    1    1    6    3    3    1 
## 0.16 0.17 0.19 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.29 0.31 0.32 0.35 0.36 0.37 
##    4    1    2    1    2    1    1    3    3    2    3    3    1    1    2    1 
## 0.39  0.4 0.41 0.42 0.43 0.44 0.46 0.47 0.49  0.5 0.53 0.54 0.55 0.57 0.65 0.68 
##    2    1    1    2    1    1    1    1    2    1    1    1    1    1    1    2 
## 0.73  0.8 0.93 0.97 0.98 1.01 1.02  1.1 1.17 1.37 1.65 2.56 
##    1    2    1    2    2    1    1    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   2.10     10.90    0.00  10.90   0.00
## 2      1   1.85      0.00    0.00   0.00   0.00
## 3      2   1.49      0.00    0.00   0.00   0.00
## 4      3   2.47      5.91    0.00   0.00   5.91
## 5      4   1.87      0.00    0.00   0.00   0.00
## 6      5   1.68      0.00    0.00   0.00   0.00
## 7      6   1.65     11.01    0.00  11.01   0.00
## 8      7   1.48      0.00    0.00   0.00   0.00
## 9      8   1.82      0.00    0.00   0.00   0.00
## 10     9   1.55      4.87    0.00   0.00   4.87
## 11    10   2.51      0.00    0.00   0.00   0.00
## 12    11   1.40      0.00    0.00   0.00   0.00
## 13    12   1.83     10.21   38.75  10.21   0.00
## 14    13   1.62      0.00    0.00   0.00   0.00
## 15    14   2.82      0.00    0.00   0.00   0.00
## 16    15   0.82      4.27    0.00   0.00   4.27
## 17    16   1.31      0.00    0.00   0.00   0.00
## 18    17   1.73      0.00    0.00   0.00   0.00
## 19    18   2.31     10.09    0.00  10.09   0.00
## 20    19   1.35      0.00    0.00   0.00   0.00
## 21    20   1.37      0.00    0.00   0.00   0.00
## 22    21   1.64      4.38    0.00   0.00   4.38
## 23    22   2.53      0.00    0.00   0.00   0.00
## 24    23   1.84      0.00    0.00   0.00   0.00
# Check for consistency in the Chicago, IL 2015 precipitation data
checkPrecipConsistency(kord2015Precip, title="Chicago, IL 2015 Precipitation by Month", yearsUse=2015)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2014    12    30    12   0       0     0    
## 2  2014    12    31    24   0       0     0    
## 3  2015     1     1    24   0       0     0    
## 4  2015     1     2    24   0.09    0.09  0.09 
## 5  2015     1     3    24   0.570   0.57  0.570
## 6  2015     1     4    24   0.04    0.04  0.04 
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2015-01    1.64   1.64   1.64
##  2 2015-02    1.2    3.4    4.8 
##  3 2015-03    1.13   1.19   1.17
##  4 2015-04    2.87   2.87   2.9 
##  5 2015-05    4.66   4.66   4.66
##  6 2015-06    7.12   7.12   7.1 
##  7 2015-07    2.85   2.85   2.85
##  8 2015-08    2.16   2.16   2.16
##  9 2015-09    4.64   4.64   4.64
## 10 2015-10    2.57   2.48   1.97
## 11 2015-11    4.67   4.67   4.67
## 12 2015-12    3.24   4.47   4.42
## p24Hour  p6Hour  p1Hour 
##   38.75   42.15   42.98

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.02 0.03 0.06 0.08 0.09 0.12 0.26 0.37 0.66 0.88 1.26 2.72 
##  344    4    7    1    2    1    1    1    1    1    2    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##    month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1      2   1    0.65   2.84   3.37      3.37      0.65  2.72
## 2      2   2    0.00   0.00   0.88      0.88      0.00  0.88
## 3      3  23    0.37   0.37   0.35      0.37      0.35  0.02
## 4      3  30    0.00   0.09   0.09      0.09      0.00  0.09
## 5      4   2    0.25   0.25   0.28      0.28      0.25  0.03
## 6      6  20    0.31   0.31   0.29      0.31      0.29  0.02
## 7     10  20    0.12   0.38   0.28      0.38      0.12  0.26
## 8     10  21    0.00   0.08   0.02      0.08      0.00  0.08
## 9     10  23    0.39   0.39   0.37      0.39      0.37  0.02
## 10    10  24    0.41   0.29   0.29      0.41      0.29  0.12
## 11    10  27    0.68   0.36   0.31      0.68      0.31  0.37
## 12    10  28    0.80   0.14   0.49      0.80      0.14  0.66
## 13    10  30    0.03   0.03   0.05      0.05      0.03  0.02
## 14    10  31    0.08   0.74   0.08      0.74      0.08  0.66
## 15    12   2    0.12   0.12   0.10      0.12      0.10  0.02
## 16    12  14    0.12   0.12   0.06      0.12      0.06  0.06
## 17    12  28    0.00   1.25   1.26      1.26      0.00  1.26
## 18     1   1    0.00   0.06   0.06      0.06      0.00  0.06
# Check for missing data and sensor anomalies
checkGapsAnomalies(kordRain2015, minDay="2015-01-01", maxDay="2015-12-31", loc="Chicago, IL (2015)")
## 
## Data file with new time and anomaly variable
## [1] 8776    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8728    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:365         Min.   : 1.00   Min.   :18.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.72   Mean   :23.91   Mean   : 1.411  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 0.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.08767  
##  3rd Qu.:0.00000  
##  Max.   :6.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2015-01           1       1           6      49    31
##  2 2015-02           1       1           9      61    28
##  3 2015-03           3       3           4      43    31
##  4 2015-04           1       1           3      16    30
##  5 2015-05           1       1           5      81    31
##  6 2015-06           4      10           4       4    30
##  7 2015-07           0       0           0       0    31
##  8 2015-08           1       2           1       1    31
##  9 2015-09           1       1           4      21    30
## 10 2015-10           2      11          12      86    31
## 11 2015-11           0       0          12     142    30
## 12 2015-12           1       1           5      11    31

There appears to be a large issue on February 1-2, 2015 where precipitation data are off by several inches. In addition to a few small one-off, there is also an extended issue from October 20-31, 2015 as well as a missing 24-hour precipitation value (1.25 inches) from December 28, 2015.

Example #36: Automating Suggestiong for Precipitation Begin and End

At each time period in a METAR, there is a precipitation state that is either “on” or “off”. The state can be checked for consistency against the begin and end data as follows. The general assumption is that recording will be more accuract for “is there precipitation now” then for all of the possible begins and ends for a specified precipitation type:

  • If the previous state is “on”, there should not be a “begin” prior to the next “end” - flag these as potential beginExclude items
  • If the previous state is “on” and the current state is “on”, then there should not be any “end” items without an associated “begin” item following - flag these as potential endExclude items
  • If the previous state is “on” and the current state is “off”, then there should be an “end” event in the remarks data - flag these as potential endAdd items
  • If the previous state is “off”, there should not be an “end” event prior to the next “begin” - flag these as potential endExclude items
  • If the previous state is “off” and the current state is “off”, then there should not be any “begin” items without an associated “end” item following - flag these as potential beginExclude items
  • If the previous state is “off” and the current state is “on”, then there should be a “begin” event in the remarks data - flag these as potential beginAdd items

Example code includes:

# Look for RA that is not preceded by FZ in the 2016 Chicago, IL data
regMatch <- "(?<!FZ)RA"

# Pull the Chicago, IL 2016 data and check for the specified precipitation pattern and lags
kordStates <- kordRain2016$testFileProc %>%
    select(dtime, origMETAR) %>%
    mutate(curPrecip=str_detect(origMETAR, paste0(".*", regMatch, ".*RMK")), 
           lagPrecip=lag(curPrecip, 1)
           )

# Use the analysis data to look for begins and ends flagged in the remarks
kordBE <- extractPrecipData(list(fullMETAR=kordStates), pType=regMatch)
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8261
## 2 TRUE          0     1   177
## 3 TRUE          1     0   176
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
# Inner join the data by dtime
kordStates <- kordStates %>%
    inner_join(kordBE %>% select(dtime, precipData, chgPrecip=isPrecip, dateUTC, hourUTC), by="dtime")
kordStates
## # A tibble: 8,815 x 8
##    dtime               origMETAR curPrecip lagPrecip precipData chgPrecip
##    <dttm>              <chr>     <lgl>     <lgl>     <chr>      <lgl>    
##  1 2015-12-31 00:51:00 KORD 310~ FALSE     NA        <NA>       FALSE    
##  2 2015-12-31 01:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  3 2015-12-31 02:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  4 2015-12-31 03:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  5 2015-12-31 04:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  6 2015-12-31 05:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  7 2015-12-31 06:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  8 2015-12-31 07:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
##  9 2015-12-31 08:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
## 10 2015-12-31 09:51:00 KORD 310~ FALSE     FALSE     <NA>       FALSE    
## # ... with 8,805 more rows, and 2 more variables: dateUTC <date>, hourUTC <int>
# Get the beginning and end times data for the desired precipitation type
kordBegin <- getBeginEndTimeMatrix(kordStates, pState="B")
kordEnd <- getBeginEndTimeMatrix(kordStates, pState="E")

# Hard-code for 2-column files (relax later)
# Extract the begin and end times
if (ncol(kordBegin) != 2 | ncol(kordEnd) != 2) { stop("Hard-coded for 2 columns, fix") }
testBT1 <- getBeginEndTimeVector(kordBegin, kordStates, extractVar="V1", extractSym="B")
testET1 <- getBeginEndTimeVector(kordEnd, kordStates, extractVar="V1", extractSym="E")
testBT2 <- getBeginEndTimeVector(kordBegin, kordStates, extractVar="V2", extractSym="B")
testET2 <- getBeginEndTimeVector(kordEnd, kordStates, extractVar="V2", extractSym="E")

# Integrate to a single file
kordExceptions <- kordStates %>%
    mutate(b1=testBT1, e1=testET1, b2=testBT2, e2=testET2, 
           begins=ifelse(is.na(b1), 0, 1) + ifelse(is.na(b2), 0, 1), 
           ends=ifelse(is.na(e1), 0, 1) + ifelse(is.na(e2), 0, 1),
           etob=begins > ends, 
           btoe=ends > begins, 
           needBegin=curPrecip & !lagPrecip & !etob, 
           needEnd=!curPrecip & lagPrecip & !btoe, 
           overBegin=etob & (lagPrecip | !curPrecip), 
           overEnd=btoe & (curPrecip | !lagPrecip)
           )

# Flag potential issues
cat("\nNeed Begin time\n")
## 
## Need Begin time
kordExceptions %>%
    filter(needBegin) %>%
    select(dtime, origMETAR)
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-03-24 21:51:00 KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00~
## 2 2016-08-29 19:51:00 KORD 291951Z 25009KT 1SM R10L/1200VP6000FT +TSRA BKN041CB~
cat("\nNeed End time\n")
## 
## Need End time
kordExceptions %>%
    filter(needEnd) %>%
    select(dtime, origMETAR)
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-07-13 18:51:00 KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A~
cat("\nExtraneous Begin time\n")
## 
## Extraneous Begin time
kordExceptions %>%
    filter(overBegin) %>%
    select(dtime, origMETAR)
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
cat("\nExtraneous End time\n")
## 
## Extraneous End time
kordExceptions %>%
    filter(overEnd) %>%
    select(dtime, origMETAR)
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
# Attempt to use on the kord2016 data - initial run
kordRain2016Test <- runFullPrecipExtraction(kord2016METAR, 
                                            pType=regMatch, 
                                            titleText="Chicago, IL Rainfall (hours) in 2016", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c(),
                                            beginExclude=c(),
                                            endAdd=c(), 
                                            beginAdd=c(),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=FALSE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8261
## 2 TRUE          0     1   177
## 3 TRUE          1     0   176
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE
## Warning in unclass(time1) - unclass(time2): longer object length is not a
## multiple of shorter object length
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -30696.0   -254.5    -43.0    518.9     -2.0 509428.0 
## 
## Problem Detected - Intervals are not positive.  Data to help investigate
## 
## Vector of Begins
##  [1] "2016-03-24 03:38:00 UTC" "2016-03-24 05:49:00 UTC"
##  [3] "2016-03-24 07:38:00 UTC" "2016-03-24 11:32:00 UTC"
##  [5] "2016-03-24 14:41:00 UTC" "2016-03-27 19:59:00 UTC"
##  [7] "2016-03-30 13:42:00 UTC" "2016-03-30 16:34:00 UTC"
##  [9] "2016-03-30 21:13:00 UTC" "2016-03-30 23:27:00 UTC"
## [11] "2016-03-31 01:09:00 UTC"
## 
## Vector of Ends
##  [1] "2016-03-24 05:09:00 UTC" "2016-03-24 05:51:00 UTC"
##  [3] "2016-03-24 08:20:00 UTC" "2016-03-24 14:01:00 UTC"
##  [5] "2016-03-24 18:41:00 UTC" "2016-03-24 22:14:00 UTC"
##  [7] "2016-03-28 00:49:00 UTC" "2016-03-30 13:50:00 UTC"
##  [9] "2016-03-30 16:59:00 UTC" "2016-03-30 21:25:00 UTC"
## [11] "2016-03-31 00:20:00 UTC"
## 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 399
## Vector of Begins
## [1] "2016-12-25 22:11:00 UTC" "2016-12-26 04:56:00 UTC"
## [3] "2016-12-26 08:23:00 UTC" "2016-12-26 08:57:00 UTC"
## [5] "2016-12-26 12:46:00 UTC"
## 
## Vector of Ends
## [1] "2016-12-24 03:43:00 UTC" "2016-12-25 23:40:00 UTC"
## [3] "2016-12-26 05:20:00 UTC" "2016-12-26 08:37:00 UTC"
## [5] "2016-12-26 12:23:00 UTC"
# Attempt to use on the kord2016 data - with adds and excludes
kordRain2016Test <- runFullPrecipExtraction(kord2016METAR, 
                                            pType=regMatch, 
                                            titleText="Chicago, IL Rainfall (hours) in 2016", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c(),
                                            beginExclude=c(),
                                            endAdd=c("2016-07-13 1851"), 
                                            beginAdd=c("2016-03-24 2151", "2016-08-29 1951"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8261
## 2 TRUE          0     1   177
## 3 TRUE          1     0   176
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.00   33.50   70.51   83.50 1243.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordRain2016Test, pType=regMatch)
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8349    0
##       TRUE      0  466
## 
## Full matches between METAR observations and intervals

Example #37: Function for Automating Suggestions for Precipitation Begin and End

The precipitation suggestions can be converted to a function, then applied to several of the other datasets.
Example code includes:

suggestBeginEndTimes <- function(lst, regMatch, listExtract="fullMETAR") {

    # Pull the data and check for the specified precipitation pattern and lags
    sugStates <- lst[[listExtract]] %>%
        select(dtime, origMETAR) %>%
        mutate(curPrecip=str_detect(origMETAR, paste0(".*", regMatch, ".*RMK")), 
               lagPrecip=lag(curPrecip, 1)
               )

    # Use the analysis data to look for begins and ends flagged in the remarks
    sugBE <- extractPrecipData(list(fullMETAR=sugStates), pType=regMatch)

    # Inner join the data by dtime
    sugStates <- sugStates %>%
        inner_join(sugBE %>% select(dtime, precipData, chgPrecip=isPrecip, dateUTC, hourUTC), by="dtime")

    # Get the beginning and end times data for the desired precipitation type
    sugBegin <- getBeginEndTimeMatrix(sugStates, pState="B")
    sugEnd <- getBeginEndTimeMatrix(sugStates, pState="E")

    # Hard-code for 2-column files (relax later)
    # Extract the begin and end times
    if (ncol(sugBegin) > 3 | ncol(sugEnd) > 3) { stop("Hard-coded for 0-3 columns, fix") }
    testBT1 <- NA
    testBT2 <- NA
    testBT3 <- NA
    testET1 <- NA
    testET2 <- NA
    testET3 <- NA
    
    if (ncol(sugBegin) >= 1) {
        testBT1 <- getBeginEndTimeVector(sugBegin, sugStates, extractVar="V1", extractSym="B")
    }
    if (ncol(sugBegin) >= 2) {
        testBT2 <- getBeginEndTimeVector(sugBegin, sugStates, extractVar="V2", extractSym="B")
    }
    if (ncol(sugBegin) >= 3) {
        testBT3 <- getBeginEndTimeVector(sugBegin, sugStates, extractVar="V3", extractSym="B")
    }

    if (ncol(sugEnd) >= 1) {
        testET1 <- getBeginEndTimeVector(sugEnd, sugStates, extractVar="V1", extractSym="E")
    }
    if (ncol(sugEnd) >= 2) {
        testET2 <- getBeginEndTimeVector(sugEnd, sugStates, extractVar="V2", extractSym="E")
    }
    if (ncol(sugEnd) >= 3) {
        testET3 <- getBeginEndTimeVector(sugEnd, sugStates, extractVar="V3", extractSym="E")
    }
    
    # Integrate to a single file
    sugExceptions <- sugStates %>%
        mutate(b1=testBT1, e1=testET1, b2=testBT2, e2=testET2, b3=testBT3, e3=testET3,
               begins=ifelse(is.na(b1), 0, 1) + ifelse(is.na(b2), 0, 1) + ifelse(is.na(b3), 0, 1), 
               ends=ifelse(is.na(e1), 0, 1) + ifelse(is.na(e2), 0, 1) + ifelse(is.na(e3), 0, 1),
               etob=begins > ends, 
               btoe=ends > begins, 
               needBegin=curPrecip & !lagPrecip & !etob, 
               needEnd=!curPrecip & lagPrecip & !btoe, 
               overBegin=etob & (lagPrecip | !curPrecip), 
               overEnd=btoe & (curPrecip | !lagPrecip)
               )
    colSums(is.na(sugExceptions)) %>% print()

    # Flag potential issues
    cat("\nNeed Begin time\n")
    sugExceptions %>%
        filter(needBegin) %>%
        select(dtime, origMETAR) %>%
        print()

    cat("\nNeed End time\n")
    sugExceptions %>%
        filter(needEnd) %>%
        select(dtime, origMETAR) %>%
        print()

    cat("\nExtraneous Begin time\n")
    sugExceptions %>%
        filter(overBegin) %>%
        select(dtime, b1, b2, b3) %>%
        print()

    cat("\nExtraneous End time\n")
    sugExceptions %>%
        filter(overEnd) %>%
        select(dtime, e1, e2, e3) %>%
        print()

    cat("\nWrong amount of begins or ends\n")
    sugExceptions %>%
        mutate(absMatch=abs(begins-ends), absLag=lag(absMatch, 1), absLead=lead(absMatch, 1)) %>%
        filter(pmax(absMatch, absLag, absLead) > 1) %>%
        select(dtime, e1, e2, e3, b1, b2, b3) %>%
        print()
    
    sugExceptions
    
}


# Look for RA that is not preceded by FZ in the 2016 Chicago, IL data
kord2016ExceptRA <- suggestBeginEndTimes(kord2016METAR, regMatch="(?<!FZ)RA")
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8261
## 2 TRUE          0     1   177
## 3 TRUE          1     0   176
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8261          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8438       8437       8794       8794       8815       8815 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-03-24 21:51:00 KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00~
## 2 2016-08-29 19:51:00 KORD 291951Z 25009KT 1SM R10L/1200VP6000FT +TSRA BKN041CB~
## 
## Need End time
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-07-13 18:51:00 KORD 131851Z 22009G18KT 10SM FEW027 SCT047 BKN250 28/22 A~
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <lgl>
## 
## Extraneous End time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kord2016 rain data
kordRain2016Test <- runFullPrecipExtraction(kord2016METAR, 
                                            pType="(?<!FZ)RA", 
                                            titleText="Chicago, IL Rainfall (hours) in 2016", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c(),
                                            beginExclude=c(),
                                            endAdd=c("2016-07-13 1851"), 
                                            beginAdd=c("2016-03-24 2151", "2016-08-29 1951"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8261
## 2 TRUE          0     1   177
## 3 TRUE          1     0   176
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    14
## 7 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   14.00   33.50   70.51   83.50 1243.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordRain2016Test, pType="(?<!FZ)RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8349    0
##       TRUE      0  466
## 
## Full matches between METAR observations and intervals
# Look for SN that is not preceded by BL in the 2016 Chicago, IL data
kord2016ExceptSN <- suggestBeginEndTimes(kord2016METAR, regMatch="(?<!BL)SN")
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8630          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8699       8692       8809       8811       8815       8815 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 4 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-02-10 00:51:00 KORD 100051Z 31012KT 4SM -SN FEW024 OVC040 M09/M13 A2993 ~
## 2 2016-03-24 21:51:00 KORD 242151Z 34014KT 1 3/4SM -RASN BR SCT010 OVC016 01/00~
## 3 2016-04-08 22:51:00 KORD 082251Z 31014G31KT 9SM -SN SCT045 BKN050 OVC080 02/M~
## 4 2016-12-24 01:51:00 KORD 240151Z 17011KT 4SM -SN BR SCT015 OVC039 01/M01 A299~
## 
## Need End time
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <lgl>
## 
## Extraneous End time
## # A tibble: 1 x 4
##   dtime               e1              e2    e3   
##   <dttm>              <chr>           <chr> <lgl>
## 1 2016-04-09 04:51:00 2016-04-09 0446 <NA>  NA   
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kord2016 snow data
kordSnow2016Test <- runFullPrecipExtraction(kord2016METAR, 
                                            pType="(?<!BL)SN", 
                                            titleText="Chicago, IL Snowfall (hours) in 2016", 
                                            yAxisText="Hours of Snow", 
                                            endExclude=c("2016-04-09 0446"),
                                            beginExclude=c(),
                                            endAdd=c(), 
                                            beginAdd=c("2016-02-10 0051", "2016-03-24 2151", 
                                                       "2016-04-08 2251", "2016-12-24 0151"
                                                       ),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8815 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 26008KT 8SM OVC016 M03/M06 A3021 RMK AO2 SLP239 T10281056" "KORD 310151Z 26008KT 9SM OVC016 M03/M06 A3021 RMK AO2 SLP240 T10281056" "KORD 310251Z 25008KT 10SM OVC015 M03/M06 A3021 RMK AO2 SLP239 T10281061 50004" "KORD 310351Z 24009KT 10SM OVC019 M03/M06 A3022 RMK AO2 SLP242 T10281061" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:51:00" "2015-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8630
## 2 TRUE          0     1    69
## 3 TRUE          1     0    62
## 4 TRUE          1     1    44
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   20.25   47.50  134.12  140.75 1016.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordSnow2016Test, pType="(?<!BL)SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8532    0
##       TRUE      0  283
## 
## Full matches between METAR observations and intervals

This algorithm can then be used to process the Chicago, IL 2015 and 2017 data:

# Look for RA that is not preceded by FZ in the 2015 Chicago, IL data
kord2015ExceptRA <- suggestBeginEndTimes(kord2015METAR, regMatch="(?<!FZ)RA")
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8234
## 2 TRUE          0     0     1
## 3 TRUE          0     1   174
## 4 TRUE          1     0   170
## 5 TRUE          1     1   163
## 6 TRUE          1     2    12
## 7 TRUE          2     1    16
## 8 TRUE          2     2     6
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8235          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8409       8405       8754       8758       8776       8776 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2015-05-11 19:51:00 KORD 111951Z 24009KT 7SM R10L/5500VP6000FT -RA BKN021 OVC~
## 2 2015-06-29 01:51:00 KORD 290151Z 22004KT 10SM -RA FEW060 BKN100 OVC150 20/16 ~
## 
## Need End time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2015-06-21 05:51:00 KORD 210551Z 19005KT 10SM SCT038 BKN050 BKN080 OVC130 20/~
## 2 2015-08-15 04:51:00 KORD 150451Z 26009G17KT 10SM BKN110 BKN150 21/18 A3010 RM~
## 
## Extraneous Begin time
## # A tibble: 1 x 4
##   dtime               b1              b2    b3   
##   <dttm>              <chr>           <chr> <lgl>
## 1 2015-04-02 11:51:00 2015-04-02 1125 <NA>  NA   
## 
## Extraneous End time
## # A tibble: 1 x 4
##   dtime               e1              e2    e3   
##   <dttm>              <chr>           <chr> <lgl>
## 1 2015-05-17 13:51:00 2015-05-17 1336 <NA>  NA   
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kord2015 rain data
kordRain2015Test <- runFullPrecipExtraction(kord2015METAR, 
                                            pType="(?<!FZ)RA", 
                                            titleText="Chicago, IL Rainfall (hours) in 2015", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c("2015-05-17 1336"),
                                            beginExclude=c("2015-04-02 1125"),
                                            endAdd=c("2015-06-21 0551", "2015-08-15 0451"), 
                                            beginAdd=c("2015-05-11 1951", "2015-06-29 0151"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8234
## 2 TRUE          0     0     1
## 3 TRUE          0     1   174
## 4 TRUE          1     0   170
## 5 TRUE          1     1   163
## 6 TRUE          1     2    12
## 7 TRUE          2     1    16
## 8 TRUE          2     2     6
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   13.00   31.00   71.31   88.00  739.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordRain2015Test, pType="(?<!FZ)RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8305    0
##       TRUE      0  471
## 
## Full matches between METAR observations and intervals
# Look for SN that is not preceded by BL in the 2015 Chicago, IL data
kord2015ExceptSN <- suggestBeginEndTimes(kord2015METAR, regMatch="(?<!BL)SN")
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8640
## 2 TRUE          0     1    47
## 3 TRUE          1     0    49
## 4 TRUE          1     1    34
## 5 TRUE          1     2     5
## 6 TRUE          3     1     1
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8640          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8687       8689       8775       8771       8775       8776 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2015-01-03 11:51:00 KORD 031151Z 13007KT 2 1/2SM -SN BR BKN005 BKN011 OVC049 ~
## 2 2015-02-25 22:51:00 KORD 252251Z 07008KT 1 1/2SM R10L/5500VP6000FT -SN SCT022~
## 
## Need End time
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <chr>
## 
## Extraneous End time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>
## 
## Wrong amount of begins or ends
## # A tibble: 3 x 7
##   dtime               e1          e2    e3    b1          b2          b3        
##   <dttm>              <chr>       <chr> <lgl> <chr>       <chr>       <chr>     
## 1 2015-01-08 17:51:00 <NA>        <NA>  NA    <NA>        <NA>        <NA>      
## 2 2015-01-08 18:51:00 2015-01-08~ <NA>  NA    2015-01-08~ 2015-01-08~ 2015-01-0~
## 3 2015-01-08 19:51:00 <NA>        <NA>  NA    <NA>        <NA>        <NA>
# Attempt to use on the kord2015 snow data
kordSnow2015Test <- runFullPrecipExtraction(kord2015METAR, 
                                            pType="(?<!BL)SN", 
                                            titleText="Chicago, IL Snowfall (hours) in 2015", 
                                            yAxisText="Hours of Snow", 
                                            endExclude=c(),
                                            beginExclude=c("2015-01-08 1845"),
                                            endAdd=c(), 
                                            beginAdd=c("2015-01-03 1151", "2015-02-25 2251"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8776 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 30007KT 10SM SCT250 M10/M19 A3061 RMK AO2 SLP380 T11001194" "KORD 310151Z 30010KT 10SM FEW250 M10/M19 A3062 RMK AO2 SLP383 T11001194" "KORD 310251Z 30007KT 10SM SCT250 M11/M19 A3060 RMK AO2 SLP378 T11061189 58000" "KORD 310351Z 28010KT 10SM SCT190 BKN250 M11/M19 A3061 RMK AO2 SLP378 T11111194" ...
##  $ dtime     : POSIXct, format: "2014-12-31 00:51:00" "2014-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2014-12-31" "2014-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8640
## 2 TRUE          0     1    47
## 3 TRUE          1     0    49
## 4 TRUE          1     1    34
## 5 TRUE          1     2     5
## 6 TRUE          3     1     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   19.25   51.50  184.18  197.75 2133.00 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 31
## Vector of Begins
##  [1] "2015-01-26 18:51:00 UTC" "2015-01-29 14:17:00 UTC"
##  [3] "2015-01-29 16:22:00 UTC" "2015-01-30 04:00:00 UTC"
##  [5] "2015-01-30 05:58:00 UTC" "2015-02-01 01:38:00 UTC"
##  [7] "2015-02-03 21:26:00 UTC" "2015-02-04 15:13:00 UTC"
##  [9] "2015-02-08 23:08:00 UTC" "2015-02-09 15:00:00 UTC"
## [11] "2015-02-09 17:08:00 UTC"
## 
## Vector of Ends
##  [1] "2015-01-27 07:47:00 UTC" "2015-01-29 15:05:00 UTC"
##  [3] "2015-01-29 16:44:00 UTC" "2015-01-30 05:49:00 UTC"
##  [5] "2015-01-30 06:30:00 UTC" "2015-02-02 13:11:00 UTC"
##  [7] "2015-02-04 00:22:00 UTC" "2015-02-04 19:42:00 UTC"
##  [9] "2015-02-08 23:23:00 UTC" "2015-02-09 16:20:00 UTC"
## [11] "2015-02-09 19:57:00 UTC"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordSnow2015Test, pType="(?<!BL)SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8497    0
##       TRUE      0  279
## 
## Full matches between METAR observations and intervals
# Look for RA that is not preceded by FZ in the 2017 Chicago, IL data
kord2017ExceptRA <- suggestBeginEndTimes(kord2017METAR, regMatch="(?<!FZ)RA")
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2016-12-31" "2016-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8246
## 2 TRUE          0     1   173
## 3 TRUE          1     0   169
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    16
## 7 TRUE          2     2     4
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8246          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8419       8415       8768       8770       8788       8788 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 3 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2017-05-18 04:51:00 KORD 180451Z 25022G37KT 8SM TSRA SCT034CB BKN047 OVC075 1~
## 2 2017-07-20 03:51:00 KORD 200351Z 00000KT 5SM TSRA BR FEW080CB BKN110 OVC130 2~
## 3 2017-10-14 08:51:00 KORD 140851Z 09004KT 4SM VCTS RA BR FEW032 SCT055 OVC090 ~
## 
## Need End time
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
## 
## Extraneous Begin time
## # A tibble: 2 x 4
##   dtime               b1              b2              b3   
##   <dttm>              <chr>           <chr>           <lgl>
## 1 2017-01-25 03:51:00 2017-01-25 0359 <NA>            NA   
## 2 2017-03-17 10:51:00 2017-03-17 0953 2017-03-17 1025 NA   
## 
## Extraneous End time
## # A tibble: 1 x 4
##   dtime               e1              e2              e3   
##   <dttm>              <chr>           <chr>           <lgl>
## 1 2017-03-01 02:51:00 2017-03-01 0201 2017-03-01 0236 NA   
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kord2017 rain data
kordRain2017Test <- runFullPrecipExtraction(kord2017METAR, 
                                            pType="(?<!FZ)RA", 
                                            titleText="Chicago, IL Rainfall (hours) in 2017", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c("2017-03-01 0201"),
                                            beginExclude=c("2017-01-25 0359", "2017-03-17 1025"),
                                            endAdd=c(), 
                                            beginAdd=c("2017-05-18 0451", "2017-07-20 0351", 
                                                       "2017-10-14 0851"
                                                       ),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2016-12-31" "2016-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8246
## 2 TRUE          0     1   173
## 3 TRUE          1     0   169
## 4 TRUE          1     1   166
## 5 TRUE          1     2    14
## 6 TRUE          2     1    16
## 7 TRUE          2     2     4
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00   14.00   34.50   83.78   92.75 1074.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordRain2017Test, pType="(?<!FZ)RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8263    0
##       TRUE      0  525
## 
## Full matches between METAR observations and intervals
# Look for SN that is not preceded by BL in the 2017 Chicago, IL data
kord2017ExceptSN <- suggestBeginEndTimes(kord2017METAR, regMatch="(?<!BL)SN")
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2016-12-31" "2016-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8655
## 2 TRUE          0     1    52
## 3 TRUE          1     0    48
## 4 TRUE          1     1    26
## 5 TRUE          1     2     2
## 6 TRUE          2     1     5
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8655          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8707       8703       8783       8786       8788       8788 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2017-03-02 02:51:00 KORD 020251Z 31014KT 9SM -SN BKN020 OVC028 01/M03 A3000 R~
## 
## Need End time
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <lgl>
## 
## Extraneous End time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kord2017 snow data
kordSnow2017Test <- runFullPrecipExtraction(kord2017METAR, 
                                            pType="(?<!BL)SN", 
                                            titleText="Chicago, IL Snowfall (hours) in 2017", 
                                            yAxisText="Hours of Snow", 
                                            endExclude=c(),
                                            beginExclude=c(),
                                            endAdd=c(), 
                                            beginAdd=c("2017-03-02 0251"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8788 obs. of  8 variables:
##  $ origMETAR : chr  "KORD 310051Z 18010KT 10SM SCT130 SCT220 M01/M07 A2984 RMK AO2 SLP112 T10111067" "KORD 310151Z 18015KT 10SM FEW130 SCT180 M01/M06 A2979 RMK AO2 SLP096 T10061056" "KORD 310251Z 18013KT 10SM SCT130 SCT180 00/M06 A2975 RMK AO2 SLP081 T00001056 58041" "KORD 310351Z 19017G22KT 10SM BKN140 BKN180 02/M06 A2971 RMK AO2 SLP067 T00171061" ...
##  $ dtime     : POSIXct, format: "2016-12-31 00:51:00" "2016-12-31 01:51:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2016-12-31" "2016-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 6 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8655
## 2 TRUE          0     1    52
## 3 TRUE          1     0    48
## 4 TRUE          1     1    26
## 5 TRUE          1     2     2
## 6 TRUE          2     1     5
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     8.0    18.0    47.0   129.2   136.0   890.0
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kordSnow2017Test, pType="(?<!BL)SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8592    0
##       TRUE      0  196
## 
## Full matches between METAR observations and intervals

And, the checks for precipitation consistency can then be run on the Chicago 2015 and 2017 data:

# Get the Chicago, IL 2015 liquid precipitation
kord2015PrecipTest <- extractLiquidPrecipAmounts(kordRain2015Test)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8152  158  101   85   42   39   19   19   22   10    9   16   12    9    7    8 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.31 0.33 
##    6    4    3    4    5    5    2    3    2    3    3    1    2    4    2    2 
## 0.34 0.35 0.37 0.39  0.4 0.43 0.44 0.49 0.58 0.66 0.71 0.77 0.81 0.88 
##    1    2    1    1    1    2    1    1    1    1    1    1    1    2 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8381   51   44   28   20   18   20   19   11   16    7    6   16    6    8    4 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.31 0.32 
##    6    6    4    5    4    6    2    7    6    3    6    6    2    6    1    3 
## 0.33 0.34 0.35 0.36 0.37 0.39  0.4 0.41 0.42 0.43 0.44 0.46 0.49 0.53 0.54 0.58 
##    2    2    2    2    3    2    3    2    1    1    2    1    3    1    2    2 
## 0.59 0.61 0.62 0.63 0.69 0.71 0.84 0.89 0.93 0.98 1.12 1.16 1.24 1.29  1.3 1.64 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 2.04 
##    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8644   10    9    9    9    1    2    4    2    5    1    1    6    3    3    1 
## 0.16 0.17 0.19 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.29 0.31 0.32 0.35 0.36 0.37 
##    4    1    2    1    2    1    1    3    3    2    3    3    1    1    2    1 
## 0.39  0.4 0.41 0.42 0.43 0.44 0.46 0.47 0.49  0.5 0.53 0.54 0.55 0.57 0.65 0.68 
##    2    1    1    2    1    1    1    1    2    1    1    1    1    1    1    2 
## 0.73  0.8 0.93 0.97 0.98 1.01 1.02  1.1 1.17 1.37 1.65 2.56 
##    1    2    1    2    2    1    1    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   2.10     10.90    0.00  10.90   0.00
## 2      1   1.85      0.00    0.00   0.00   0.00
## 3      2   1.49      0.00    0.00   0.00   0.00
## 4      3   2.47      5.91    0.00   0.00   5.91
## 5      4   1.87      0.00    0.00   0.00   0.00
## 6      5   1.68      0.00    0.00   0.00   0.00
## 7      6   1.65     11.01    0.00  11.01   0.00
## 8      7   1.48      0.00    0.00   0.00   0.00
## 9      8   1.82      0.00    0.00   0.00   0.00
## 10     9   1.55      4.87    0.00   0.00   4.87
## 11    10   2.51      0.00    0.00   0.00   0.00
## 12    11   1.40      0.00    0.00   0.00   0.00
## 13    12   1.83     10.21   38.75  10.21   0.00
## 14    13   1.62      0.00    0.00   0.00   0.00
## 15    14   2.82      0.00    0.00   0.00   0.00
## 16    15   0.82      4.27    0.00   0.00   4.27
## 17    16   1.31      0.00    0.00   0.00   0.00
## 18    17   1.73      0.00    0.00   0.00   0.00
## 19    18   2.31     10.09    0.00  10.09   0.00
## 20    19   1.35      0.00    0.00   0.00   0.00
## 21    20   1.37      0.00    0.00   0.00   0.00
## 22    21   1.64      4.38    0.00   0.00   4.38
## 23    22   2.53      0.00    0.00   0.00   0.00
## 24    23   1.84      0.00    0.00   0.00   0.00
# Check for consistency in the Chicago, IL 2015 precipitation data
checkPrecipConsistency(kord2015PrecipTest, title="Chicago, IL 2015 Precipitation by Month", yearsUse=2015)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2014    12    30    12   0       0     0    
## 2  2014    12    31    24   0       0     0    
## 3  2015     1     1    24   0       0     0    
## 4  2015     1     2    24   0.09    0.09  0.09 
## 5  2015     1     3    24   0.570   0.57  0.570
## 6  2015     1     4    24   0.04    0.04  0.04 
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2015-01    1.64   1.64   1.64
##  2 2015-02    1.2    3.4    4.8 
##  3 2015-03    1.13   1.19   1.17
##  4 2015-04    2.87   2.87   2.9 
##  5 2015-05    4.66   4.66   4.66
##  6 2015-06    7.12   7.12   7.1 
##  7 2015-07    2.85   2.85   2.85
##  8 2015-08    2.16   2.16   2.16
##  9 2015-09    4.64   4.64   4.64
## 10 2015-10    2.57   2.48   1.97
## 11 2015-11    4.67   4.67   4.67
## 12 2015-12    3.24   4.47   4.42
## p24Hour  p6Hour  p1Hour 
##   38.75   42.15   42.98

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.02 0.03 0.06 0.08 0.09 0.12 0.26 0.37 0.66 0.88 1.26 2.72 
##  344    4    7    1    2    1    1    1    1    1    2    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##    month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1      2   1    0.65   2.84   3.37      3.37      0.65  2.72
## 2      2   2    0.00   0.00   0.88      0.88      0.00  0.88
## 3      3  23    0.37   0.37   0.35      0.37      0.35  0.02
## 4      3  30    0.00   0.09   0.09      0.09      0.00  0.09
## 5      4   2    0.25   0.25   0.28      0.28      0.25  0.03
## 6      6  20    0.31   0.31   0.29      0.31      0.29  0.02
## 7     10  20    0.12   0.38   0.28      0.38      0.12  0.26
## 8     10  21    0.00   0.08   0.02      0.08      0.00  0.08
## 9     10  23    0.39   0.39   0.37      0.39      0.37  0.02
## 10    10  24    0.41   0.29   0.29      0.41      0.29  0.12
## 11    10  27    0.68   0.36   0.31      0.68      0.31  0.37
## 12    10  28    0.80   0.14   0.49      0.80      0.14  0.66
## 13    10  30    0.03   0.03   0.05      0.05      0.03  0.02
## 14    10  31    0.08   0.74   0.08      0.74      0.08  0.66
## 15    12   2    0.12   0.12   0.10      0.12      0.10  0.02
## 16    12  14    0.12   0.12   0.06      0.12      0.06  0.06
## 17    12  28    0.00   1.25   1.26      1.26      0.00  1.26
## 18     1   1    0.00   0.06   0.06      0.06      0.00  0.06
# Check for missing data and sensor anomalies
checkGapsAnomalies(kordRain2015Test, minDay="2015-01-01", maxDay="2015-12-31", loc="Chicago, IL (2015)")
## 
## Data file with new time and anomaly variable
## [1] 8776    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8728    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:365         Min.   : 1.00   Min.   :18.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.72   Mean   :23.91   Mean   : 1.411  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 0.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.08767  
##  3rd Qu.:0.00000  
##  Max.   :6.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2015-01           1       1           6      49    31
##  2 2015-02           1       1           9      61    28
##  3 2015-03           3       3           4      43    31
##  4 2015-04           1       1           3      16    30
##  5 2015-05           1       1           5      81    31
##  6 2015-06           4      10           4       4    30
##  7 2015-07           0       0           0       0    31
##  8 2015-08           1       2           1       1    31
##  9 2015-09           1       1           4      21    30
## 10 2015-10           2      11          12      86    31
## 11 2015-11           0       0          12     142    30
## 12 2015-12           1       1           5      11    31
# Get the Chicago, IL 2017 liquid precipitation
kord2017PrecipTest <- extractLiquidPrecipAmounts(kordRain2017Test)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8203  168   68   63   44   35   25   24   20   16    4   13   11   10    9    8 
## 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29  0.3 0.31 0.32 
##    6    7    3    3    3    2    4    3    2    3    3    4    1    1    1    3 
## 0.34 0.38 0.39 0.41 0.44 0.48 0.52 0.57 0.72 0.73 0.78 0.89 0.92 0.97 
##    1    1    1    4    2    1    1    1    1    1    1    1    1    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8420   70   41   28   11   18   12   10   13    6    7    9    5    6    8    8 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.29 0.31 0.32 0.34 
##    5    8    2    5    5    5    2    2    7    6    2    2    2    2    2    4 
## 0.35 0.36 0.37 0.38 0.39  0.4 0.41 0.43 0.45 0.46 0.48 0.49  0.5 0.51 0.52 0.55 
##    2    1    4    1    1    3    1    1    1    1    2    5    3    2    1    2 
## 0.56  0.6 0.63 0.64 0.65 0.71 0.73 0.75 0.76  0.8 0.85 0.92 0.94 0.97 1.02 1.09 
##    1    1    1    1    1    1    1    2    1    1    2    2    1    1    1    1 
##  1.1 1.18 1.34 1.63 1.69 
##    1    1    1    1    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8664   10   12   11    5    3    2    3    2    2    1    2    3    3    1    2 
## 0.16 0.17 0.19  0.2 0.21 0.23 0.24 0.25 0.27 0.29 0.31 0.32 0.33 0.35 0.37 0.38 
##    2    3    2    3    2    3    2    1    1    1    2    2    1    2    1    1 
## 0.39 0.41 0.48  0.5 0.51 0.52 0.53 0.56 0.57 0.59  0.6 0.61 0.64 0.68 0.71 0.85 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 0.87 0.88 0.91 0.92 1.09 1.13 1.24  1.3 1.34 1.37  1.4 1.55 1.74 2.03 2.08  3.3 
##    1    2    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   2.49      9.64    0.00   9.64   0.00
## 2      1   2.64      0.00    0.00   0.00   0.00
## 3      2   2.15      0.00    0.00   0.00   0.00
## 4      3   1.12      5.99    0.00   0.00   5.99
## 5      4   2.19      0.00    0.00   0.00   0.00
## 6      5   1.98      0.00    0.00   0.00   0.00
## 7      6   2.38     12.55    0.00  12.55   0.00
## 8      7   1.21      0.00    0.00   0.00   0.00
## 9      8   2.05      0.00    0.00   0.00   0.00
## 10     9   2.08      5.35    0.00   0.00   5.35
## 11    10   1.32      0.00    0.00   0.00   0.00
## 12    11   2.46      0.00    0.00   0.00   0.00
## 13    12   2.02     11.15   43.63  11.15   0.00
## 14    13   2.11      0.00    0.00   0.00   0.00
## 15    14   1.34      0.00    0.00   0.00   0.00
## 16    15   0.92      4.06    0.00   0.00   4.06
## 17    16   1.71      0.00    0.00   0.00   0.00
## 18    17   2.07      0.00    0.00   0.00   0.00
## 19    18   1.62     10.33    0.00  10.33   0.00
## 20    19   0.69      0.00    0.00   0.00   0.00
## 21    20   1.10      0.00    0.00   0.00   0.00
## 22    21   0.98      2.77    0.00   0.00   2.77
## 23    22   1.51      0.00    0.00   0.00   0.00
## 24    23   2.87      0.00    0.00   0.00   0.00
# Check for consistency in the Chicago, IL 2017 precipitation data
checkPrecipConsistency(kord2017PrecipTest, title="Chicago, IL 2017 Precipitation by Month", yearsUse=2017)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2016    12    30    12    0      0      0   
## 2  2016    12    31    24    0      0      0   
## 3  2017     1     1    24    0      0      0   
## 4  2017     1     2    24    0.11   0.11   0.11
## 5  2017     1     3    24    0      0      0   
## 6  2017     1     4    24    0      0      0   
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2017-01    2.87   2.87   2.87
##  2 2017-02    1.52   1.52   1.44
##  3 2017-03    4.06   4.06   4.05
##  4 2017-04    6.9    6.94   6.94
##  5 2017-05    3.28   3.28   3.28
##  6 2017-06    3.44   3.44   3.44
##  7 2017-07    7.68   7.68   7.12
##  8 2017-08    2.51   2.51   2.51
##  9 2017-09    0.32   0.32   0.32
## 10 2017-10    8.7    8.7    8.69
## 11 2017-11    1.75   1.75   1.75
## 12 2017-12    0.6    0.6    0.6 
## p24Hour  p6Hour  p1Hour 
##   43.63   43.67   43.01

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.08 0.17 0.21 0.55 
##  361    3    1    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##   month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1     2  28    0.64   0.64   0.56      0.64      0.56  0.08
## 2     4   2    0.17   0.00   0.00      0.17      0.00  0.17
## 3     4   3    0.20   0.41   0.41      0.41      0.20  0.21
## 4     7  12    1.74   1.74   1.19      1.74      1.19  0.55
# Check for missing data and sensor anomalies
checkGapsAnomalies(kordRain2017Test, minDay="2017-01-01", maxDay="2017-12-31", loc="Chicago, IL (2017)")
## 
## Data file with new time and anomaly variable
## [1] 8788    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8740    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:365         Min.   : 1.00   Min.   :20.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.72   Mean   :23.95   Mean   : 2.649  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 0.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.0548  
##  3rd Qu.:0.0000  
##  Max.   :4.0000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2017-01           1       1           7      71    31
##  2 2017-02           2       3           5      27    28
##  3 2017-03           0       0           8      96    31
##  4 2017-04           0       0           4      24    30
##  5 2017-05           0       0           5      68    31
##  6 2017-06           1       2           4      48    30
##  7 2017-07           4       4           9      69    31
##  8 2017-08           2       5           3      10    31
##  9 2017-09           1       1           5      60    30
## 10 2017-10           4       4          15     216    31
## 11 2017-11           0       0           9      92    30
## 12 2017-12           0       0          17     186    31

The 2017 precipitation data in Chicago show much better alignment among the 1-hour, 6-hour, and 24-hour totals summed by day or month. There is heavy precipitation on July 12, 2017 that merits further exploration and an April 2-3, 2017 rain event that has about the same sum but differs by day. This data is suggestive of occasional one-off problems rather than a seemingly systemic issue as in parts of 2015-2016.

Example #38: Extending Suggestions to Other Locales

The suggestions can be extended to the 2016 data for Minneapolis, MN and Detroit, MI.

Example code includes:

# Look for RA that is not preceded by FZ in the 2015 Minneapolis, MN data
kmsp2016ExceptRA <- suggestBeginEndTimes(kmsp2016METAR, regMatch="(?<!FZ)RA")
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  8 variables:
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8301
## 2 TRUE          0     0     1
## 3 TRUE          0     1   173
## 4 TRUE          1     0   177
## 5 TRUE          1     1   137
## 6 TRUE          1     2    11
## 7 TRUE          2     1    10
## 8 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8302          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8475       8479       8800       8799       8817       8817 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-09-06 07:53:00 KMSP 060753Z 22007G17KT 6SM -TSRA FEW023 BKN049CB OVC095 ~
## 
## Need End time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-09-06 08:53:00 KMSP 060853Z 24003KT 8SM TS BKN049CB BKN060 OVC080 25/22 ~
## 2 2016-11-22 13:53:00 KMSP 221353Z 13018KT 10SM -PL OVC055 01/M06 A3015 RMK AO2~
## 
## Extraneous Begin time
## # A tibble: 4 x 4
##   dtime               b1              b2              b3   
##   <dttm>              <chr>           <chr>           <lgl>
## 1 2016-05-29 19:53:00 2016-05-29 1931 <NA>            NA   
## 2 2016-06-20 04:53:00 2016-06-20 0411 <NA>            NA   
## 3 2016-09-06 08:53:00 2016-09-06 0744 2016-09-06 0817 NA   
## 4 2016-11-29 07:53:00 2016-11-29 0718 <NA>            NA   
## 
## Extraneous End time
## # A tibble: 2 x 4
##   dtime               e1              e2    e3   
##   <dttm>              <chr>           <chr> <lgl>
## 1 2016-04-05 15:53:00 2016-04-05 1522 <NA>  NA   
## 2 2016-10-18 04:53:00 2016-10-18 0435 <NA>  NA   
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kmsp2016 rain data
kmspRain2016Test <- runFullPrecipExtraction(kmsp2016METAR, 
                                            pType="(?<!FZ)RA", 
                                            titleText="Minneapolis, MN Rainfall (hours) in 2016", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c("2016-04-05 1522", "2016-10-18 0435"),
                                            beginExclude=c("2016-05-29 1931", "2016-06-20 0411", 
                                                           "2016-09-06 0744", "2016-09-16 0817", 
                                                           "2016-11-29 0718"
                                                           ),
                                            endAdd=c("2016-09-06 0853", "2016-11-22 1352"), 
                                            beginAdd=c("2016-09-06 0753"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  8 variables:
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8301
## 2 TRUE          0     0     1
## 3 TRUE          0     1   173
## 4 TRUE          1     0   177
## 5 TRUE          1     1   137
## 6 TRUE          1     2    11
## 7 TRUE          2     1    10
## 8 TRUE          2     2     7
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     2.0    14.0    35.5    72.9    86.0  1033.0
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kmspRain2016Test, pType="(?<!FZ)RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8390    0
##       TRUE      0  427
## 
## Full matches between METAR observations and intervals
# Look for SN that is not preceded by BL in the 2016 Minneapolis, MN data
kmsp2016ExceptSN <- suggestBeginEndTimes(kmsp2016METAR, regMatch="(?<!BL)SN")
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  8 variables:
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  "SNB51" NA NA "SNE40" ...
##  $ isPrecip  : logi  TRUE FALSE FALSE TRUE FALSE FALSE ...
##  $ nBegin    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8572
## 2 TRUE          0     1    97
## 3 TRUE          1     0    91
## 4 TRUE          1     1    48
## 5 TRUE          1     2     1
## 6 TRUE          2     1     5
## 7 TRUE          2     2     3
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8572          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8669       8663       8809       8813       8817       8817 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          0          1 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 3 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-01-12 00:53:00 KMSP 120053Z 26005KT 6SM -SN BKN031 OVC040 M14/M18 A2974 ~
## 2 2016-12-07 15:53:00 KMSP 071553Z 26019G26KT 7SM -SN SCT021 OVC035 M08/M12 A30~
## 3 2016-12-08 22:53:00 KMSP 082253Z 32012KT 9SM -SN OVC030 M06/M12 A3046 RMK AO2~
## 
## Need End time
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-12-07 14:53:00 KMSP 071453Z 26015G20KT 3SM -SN FEW019 SCT026 OVC032 M08/~
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <lgl>
## 
## Extraneous End time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kmsp2016 snow data
kmspSnow2016Test <- runFullPrecipExtraction(kmsp2016METAR, 
                                            pType="(?<!BL)SN", 
                                            titleText="Minneapolis, MN Snowfall (hours) in 2016", 
                                            yAxisText="Hours of Snow", 
                                            endExclude=c(),
                                            beginExclude=c(),
                                            endAdd=c("2016-12-07 1453"), 
                                            beginAdd=c("2016-01-12 0053", "2016-12-07 1553", 
                                                       "2016-12-08 2253"
                                                       ),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8817 obs. of  8 variables:
##  $ origMETAR : chr  "KMSP 310053Z 27008KT 4SM -SN OVC018 M05/M08 A3019 RMK AO2 SNB51 SLP237 P0000 T10501083" "KMSP 310153Z 23011KT 5SM -SN BR FEW012 BKN017 OVC070 M06/M08 A3019 RMK AO2 SLP237 P0000 T10561078" "KMSP 310253Z 25007KT 9SM -SN OVC019 M06/M09 A3019 RMK AO2 SLP239 P0000 60000 T10611089 51004" "KMSP 310353Z 27008KT 10SM SCT016 BKN041 M07/M10 A3019 RMK AO2 SNE40 SLP238 P0000 T10721100" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  "SNB51" NA NA "SNE40" ...
##  $ isPrecip  : logi  TRUE FALSE FALSE TRUE FALSE FALSE ...
##  $ nBegin    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8572
## 2 TRUE          0     1    97
## 3 TRUE          1     0    91
## 4 TRUE          1     1    48
## 5 TRUE          1     2     1
## 6 TRUE          2     1     5
## 7 TRUE          2     2     3
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     5.0    23.0    73.0   145.1   159.5  1783.0 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 133
## Vector of Begins
##  [1] "2016-12-08 17:48:00 UTC" "2016-12-08 22:53:00 UTC"
##  [3] "2016-12-09 14:00:00 UTC" "2016-12-10 01:00:00 UTC"
##  [5] "2016-12-10 05:01:00 UTC" "2016-12-10 20:06:00 UTC"
##  [7] "2016-12-12 12:52:00 UTC" "2016-12-13 00:12:00 UTC"
##  [9] "2016-12-16 04:22:00 UTC" "2016-12-16 14:02:00 UTC"
## [11] "2016-12-16 19:19:00 UTC"
## 
## Vector of Ends
##  [1] "2016-12-08 21:53:00 UTC" "2016-12-09 03:35:00 UTC"
##  [3] "2016-12-09 15:56:00 UTC" "2016-12-10 03:18:00 UTC"
##  [5] "2016-12-10 05:15:00 UTC" "2016-12-12 01:49:00 UTC"
##  [7] "2016-12-12 13:27:00 UTC" "2016-12-13 00:25:00 UTC"
##  [9] "2016-12-16 08:39:00 UTC" "2016-12-16 16:23:00 UTC"
## [11] "2016-12-17 08:02:00 UTC"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kmspSnow2016Test, pType="(?<!BL)SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8438    0
##       TRUE      0  379
## 
## Full matches between METAR observations and intervals
# Look for RA that is not preceded by FZ in the 2016 Detroit, MI data
kdtw2016ExceptRA <- suggestBeginEndTimes(kdtw2016METAR, regMatch="(?<!FZ)RA")
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8262
## 2 TRUE          0     1   186
## 3 TRUE          1     0   185
## 4 TRUE          1     1   157
## 5 TRUE          1     2    11
## 6 TRUE          2     1    13
## 7 TRUE          2     2     4
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8262          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8448       8447       8801       8803       8818       8818 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 2 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-02-24 11:53:00 KDTW 241153Z 04014G19KT 5SM -RAPL SCT008 OVC012 01/M02 A2~
## 2 2016-04-07 23:53:00 KDTW 072353Z 30007KT 7SM -SNRA BKN022 OVC041 01/M01 A2962~
## 
## Need End time
## # A tibble: 4 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-04-01 00:53:00 KDTW 010053Z 22012KT 10SM FEW030 BKN060 BKN090 16/11 A294~
## 2 2016-04-07 22:53:00 KDTW 072253Z 30013G19KT 1 1/2SM R03R/6000VP6000FT -SN BKN~
## 3 2016-08-15 00:53:00 KDTW 150053Z 00000KT 9SM FEW045 OVC095 22/19 A3007 RMK AO~
## 4 2016-08-16 14:53:00 KDTW 161453Z 22019G22 10SM BKN014 OVC030 24/22 A2993 RMK ~
## 
## Extraneous Begin time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, b1 <chr>, b2 <chr>, b3 <lgl>
## 
## Extraneous End time
## # A tibble: 1 x 4
##   dtime               e1              e2    e3   
##   <dttm>              <chr>           <chr> <lgl>
## 1 2016-07-14 00:53:00 2016-07-14 0030 <NA>  NA   
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <lgl>, b1 <chr>,
## #   b2 <chr>, b3 <lgl>
# Attempt to use on the kdtw2016 rain data
kdtwRain2016Test <- runFullPrecipExtraction(kdtw2016METAR, 
                                            pType="(?<!FZ)RA", 
                                            titleText="Detroit, MI Rainfall (hours) in 2016", 
                                            yAxisText="Hours of Rain", 
                                            endExclude=c("2016-07-14 0030"),
                                            beginExclude=c(),
                                            endAdd=c("2016-04-01 0053", "2016-04-07 2253", 
                                                     "2016-08-15 0053", "2016-08-16 1453"
                                                     ), 
                                            beginAdd=c("2016-02-24 1153", "2016-04-07 2353"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!FZ)RA[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 7 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8262
## 2 TRUE          0     1   186
## 3 TRUE          1     0   185
## 4 TRUE          1     1   157
## 5 TRUE          1     2    11
## 6 TRUE          2     1    13
## 7 TRUE          2     2     4
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   15.00   36.00   86.42   89.00  932.00
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kdtwRain2016Test, pType="(?<!FZ)RA")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8258    0
##       TRUE      0  560
## 
## Full matches between METAR observations and intervals
# Look for SN that is not preceded by BL in the 2016 Detroit, MI data
kdtw2016ExceptSN <- suggestBeginEndTimes(kdtw2016METAR, regMatch="(?<!BL)SN")
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8540
## 2 TRUE          0     1   105
## 3 TRUE          1     0   103
## 4 TRUE          1     1    57
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 7 TRUE          2     2     2
## 8 TRUE          3     3     1
## 
## Are there any problems with duplicated keys? FALSE 
##      dtime  origMETAR  curPrecip  lagPrecip precipData  chgPrecip    dateUTC 
##          0          0          0          1       8540          0          0 
##    hourUTC         b1         e1         b2         e2         b3         e3 
##          0       8645       8643       8809       8811       8817       8817 
##     begins       ends       etob       btoe  needBegin    needEnd  overBegin 
##          0          0          0          0          0          1          0 
##    overEnd 
##          0 
## 
## Need Begin time
## # A tibble: 1 x 2
##   dtime               origMETAR                                                 
##   <dttm>              <chr>                                                     
## 1 2016-02-09 10:53:00 KDTW 091053Z 23007KT 2SM -SN BR SCT007 OVC015 00/M02 A295~
## 
## Need End time
## # A tibble: 0 x 2
## # ... with 2 variables: dtime <dttm>, origMETAR <chr>
## 
## Extraneous Begin time
## # A tibble: 1 x 4
##   dtime               b1              b2    b3   
##   <dttm>              <chr>           <chr> <chr>
## 1 2016-02-24 11:53:00 2016-02-24 1112 <NA>  <NA> 
## 
## Extraneous End time
## # A tibble: 0 x 4
## # ... with 4 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <chr>
## 
## Wrong amount of begins or ends
## # A tibble: 0 x 7
## # ... with 7 variables: dtime <dttm>, e1 <chr>, e2 <chr>, e3 <chr>, b1 <chr>,
## #   b2 <chr>, b3 <chr>
# Attempt to use on the kdtw2016 snow data
kdtwSnow2016Test <- runFullPrecipExtraction(kdtw2016METAR, 
                                            pType="(?<!BL)SN", 
                                            titleText="Detroit, MI Snowfall (hours) in 2016", 
                                            yAxisText="Hours of Snow", 
                                            endExclude=c(),
                                            beginExclude=c("2016-02-24 1112"),
                                            endAdd=c(), 
                                            beginAdd=c("2016-02-09 1053"),
                                            maxProb=1440, 
                                            sState=FALSE, 
                                            makePlots=TRUE
                                            )
## 
## Regex search code is: ((?<!BL)SN[B|E]\d+[0-9BE]*) 
## 
## Classes 'tbl_df', 'tbl' and 'data.frame':    8818 obs. of  8 variables:
##  $ origMETAR : chr  "KDTW 310053Z 23007KT 10SM BKN025 OVC050 02/M03 A3017 RMK AO2 SLP223 T00221033" "KDTW 310153Z 22009KT 10SM OVC021 02/M03 A3019 RMK AO2 SLP229 T00221028" "KDTW 310253Z 24009KT 10SM OVC021 02/M03 A3018 RMK AO2 SLP224 T00171028 50004" "KDTW 310353Z 24010KT 10SM OVC025 02/M04 A3018 RMK AO2 SLP225 T00171039" ...
##  $ dtime     : POSIXct, format: "2015-12-31 00:53:00" "2015-12-31 01:53:00" ...
##  $ precipData: chr  NA NA NA NA ...
##  $ isPrecip  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ nBegin    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ nEnd      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ dateUTC   : Date, format: "2015-12-31" "2015-12-31" ...
##  $ hourUTC   : int  0 1 2 3 4 5 6 7 8 9 ...
## 
## # A tibble: 8 x 4
##   isPrecip nBegin  nEnd     n
##   <lgl>     <dbl> <dbl> <int>
## 1 FALSE         0     0  8540
## 2 TRUE          0     1   105
## 3 TRUE          1     0   103
## 4 TRUE          1     1    57
## 5 TRUE          1     2     4
## 6 TRUE          2     1     6
## 7 TRUE          2     2     2
## 8 TRUE          3     3     1
## 
## Are there any problems with duplicated keys? FALSE 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0    25.0    60.0   155.8   195.5  1575.0 
## 
## Potential problem Detected - very long.  Data to help investigate
## 
## Positions with problems are: 160
## Vector of Begins
##  [1] "2016-12-10 02:54:00 UTC" "2016-12-10 03:53:00 UTC"
##  [3] "2016-12-10 11:10:00 UTC" "2016-12-10 14:23:00 UTC"
##  [5] "2016-12-10 20:39:00 UTC" "2016-12-11 06:10:00 UTC"
##  [7] "2016-12-12 11:50:00 UTC" "2016-12-12 11:56:00 UTC"
##  [9] "2016-12-12 15:21:00 UTC" "2016-12-13 15:49:00 UTC"
## [11] "2016-12-14 20:36:00 UTC"
## 
## Vector of Ends
##  [1] "2016-12-10 03:15:00 UTC" "2016-12-10 10:10:00 UTC"
##  [3] "2016-12-10 11:35:00 UTC" "2016-12-10 15:14:00 UTC"
##  [5] "2016-12-10 20:59:00 UTC" "2016-12-12 08:25:00 UTC"
##  [7] "2016-12-12 11:52:00 UTC" "2016-12-12 12:08:00 UTC"
##  [9] "2016-12-12 15:30:00 UTC" "2016-12-13 16:32:00 UTC"
## [11] "2016-12-14 22:25:00 UTC"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Intervals match
tmp <- intervalConsistency(kdtwSnow2016Test, pType="(?<!BL)SN")
##            intMETAR
## precipMETAR FALSE TRUE
##       FALSE  8344    0
##       TRUE      0  474
## 
## Full matches between METAR observations and intervals

And, the checks for precipitation consistency can then be run on the Minneapolis and Detroit 2016 data:

# Get the Minneapolis, MN 2016 liquid precipitation
kmsp2016PrecipTest <- extractLiquidPrecipAmounts(kmspRain2016Test)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8278  176   86   48   31   28   27   18   17    8    6    9    6    5    3    8 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.28 0.29  0.3 0.32 0.36 
##    5    4    1    1    2    2    3    2    1    5    6    3    2    1    1    1 
## 0.37 0.38 0.39 0.42 0.45 0.46 0.47  0.5 0.52  0.6 0.64 0.74 0.76 0.79 0.83 0.84 
##    1    1    1    1    2    3    3    1    1    1    1    1    1    2    1    1 
## 0.98 
##    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8465   73   40   21   21   16   16   17    9    5    8    3    3    5    9    4 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.24 0.25 0.26 0.27 0.29 0.31 0.32 0.33 0.35 
##    4    7    7    1    5    4    2    2    3    3    4    5    2    1    2    2 
## 0.36 0.37 0.38 0.39  0.4 0.41 0.43 0.44 0.45 0.47 0.48 0.51 0.52 0.55 0.56  0.6 
##    1    1    1    2    1    1    1    1    1    6    1    1    1    2    2    1 
## 0.62 0.63 0.64 0.71 0.76 0.85 0.87 0.93 0.99 1.01 1.12 1.13 1.21 1.22  1.3 1.34 
##    1    1    3    2    2    1    1    2    1    1    2    1    1    1    1    1 
## 1.51 1.86 
##    1    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08  0.1 0.11 0.12 0.15 0.16 0.17 0.18 
## 8693   20    9    7    7    4    4    4    1    3    1    1    2    1    1    1 
## 0.19  0.2 0.21 0.24 0.25 0.29  0.3 0.31 0.32 0.33 0.36 0.37 0.38 0.39 0.44 0.45 
##    1    4    3    2    2    1    1    2    2    3    1    1    2    1    1    1 
## 0.46 0.47 0.48 0.49 0.52 0.53  0.6 0.63 0.64 0.66 0.73 0.78  0.8 0.85 0.86 0.88 
##    1    1    1    1    1    1    1    1    2    1    1    1    1    1    1    1 
##  0.9 0.93 0.97 1.05 1.08 1.13 1.21 1.49 1.61 1.96 2.17 2.63 
##    1    2    1    1    1    1    1    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   2.34      9.63     0.0   9.63   0.00
## 2      1   1.96      0.00     0.0   0.00   0.00
## 3      2   2.38      0.00     0.0   0.00   0.00
## 4      3   2.48      6.82     0.0   0.00   6.82
## 5      4   1.59      0.00     0.0   0.00   0.00
## 6      5   2.19      0.00     0.0   0.00   0.00
## 7      6   2.41     13.31     0.0  13.31   0.00
## 8      7   1.15      0.00     0.0   0.00   0.00
## 9      8   1.87      0.00     0.0   0.00   0.00
## 10     9   1.78      4.80     0.0   0.00   4.80
## 11    10   1.71      0.00     0.0   0.00   0.00
## 12    11   1.21      0.00     0.0   0.00   0.00
## 13    12   1.11      8.95    40.3   8.95   0.00
## 14    13   1.02      0.00     0.0   0.00   0.00
## 15    14   1.98      0.00     0.0   0.00   0.00
## 16    15   0.80      3.80     0.0   0.00   3.80
## 17    16   1.34      0.00     0.0   0.00   0.00
## 18    17   1.05      0.00     0.0   0.00   0.00
## 19    18   2.20      8.42     0.0   8.42   0.00
## 20    19   0.89      0.00     0.0   0.00   0.00
## 21    20   0.49      0.00     0.0   0.00   0.00
## 22    21   1.08      2.46     0.0   0.00   2.46
## 23    22   2.25      0.00     0.0   0.00   0.00
## 24    23   2.30      0.00     0.0   0.00   0.00
# Check for consistency in the Minneapolis, MN 2016 precipitation data
checkPrecipConsistency(kmsp2016PrecipTest, title="Minneapolis, MN 2016 Precipitation by Month", yearsUse=2016)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2015    12    30    12    0      0      0   
## 2  2015    12    31    24    0.01   0.01   0.01
## 3  2016     1     1    24    0      0      0   
## 4  2016     1     2    24    0      0      0   
## 5  2016     1     3    24    0      0      0   
## 6  2016     1     4    24    0      0      0   
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2016-01    0.31   0.31   0.31
##  2 2016-02    1.06   1.07   0.71
##  3 2016-03    2.28   2.28   2.08
##  4 2016-04    2.82   2.82   2.82
##  5 2016-05    2.42   2.42   2.42
##  6 2016-06    4.49   4.49   4.49
##  7 2016-07    5.09   5.09   5.09
##  8 2016-08    7.82   7.82   7.82
##  9 2016-09    5.47   5.47   5.47
## 10 2016-10    3.41   3.41   3.24
## 11 2016-11    2.98   2.98   2.98
## 12 2016-12    2.14   2.14   2.14
## p24Hour  p6Hour  p1Hour 
##   40.29   40.30   39.57

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.02 0.05 0.12  0.2 0.33 
##  363    1    1    1    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##   month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1     2   2    0.52   0.52   0.19      0.52      0.19  0.33
## 2     3  23    0.31   0.31   0.11      0.31      0.11  0.20
## 3    10   4    0.73   0.73   0.61      0.73      0.61  0.12
## 4    10  17    0.66   0.66   0.61      0.66      0.61  0.05
# Check for missing data and sensor anomalies
checkGapsAnomalies(kmspRain2016Test, minDay="2016-01-01", maxDay="2016-12-31", loc="Minneapolis, MN (2016)")
## 
## Data file with new time and anomaly variable
## [1] 8817    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8769    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly      
##  Length:366         Min.   : 1.00   Min.   :20.00   Min.   : 0.000  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.000  
##  Mode  :character   Median :16.00   Median :24.00   Median : 0.000  
##                     Mean   :15.76   Mean   :23.96   Mean   : 1.623  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.: 0.000  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.000  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.04098  
##  3rd Qu.:0.00000  
##  Max.   :4.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2016-01           0       0           1      15    31
##  2 2016-02           0       0           1      10    29
##  3 2016-03           0       0          20     268    31
##  4 2016-04           0       0           1       2    30
##  5 2016-05           2       2           3      50    31
##  6 2016-06           1       2           2       5    30
##  7 2016-07           1       4           0       0    31
##  8 2016-08           1       1           5      59    31
##  9 2016-09           0       0           1       2    30
## 10 2016-10           2       2           5      60    31
## 11 2016-11           1       1           4      21    30
## 12 2016-12           2       3           8     102    31
# Get the Detroit, MI 2016 liquid precipitation
kdtw2016PrecipTest <- extractLiquidPrecipAmounts(kdtwRain2016Test)
## 
## Hourly totals for liquid precipitation equivalents:
## 
## pAmounts1Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.14 0.15 0.16 
## 8246  181   95   55   43   32   27   25   17   13    7   13    6    5    4    4 
## 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.27 0.28  0.3 0.31 0.35 0.36 0.38 0.41 0.42 
##    4    1    5    1    5    6    3    2    2    1    1    1    1    1    1    1 
## 0.43 0.56  0.6 0.67 0.73 1.02 1.08 
##    1    1    1    2    2    1    1 
## 
## 3/6-hourly totals for liquid precipitation equivalents:
## 
## pAmounts6Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09  0.1 0.11 0.12 0.13 0.14 0.15 
## 8446   66   54   25   25   20    9   14   16    4    5    9    7    7    3    5 
## 0.16 0.17 0.18 0.19  0.2 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28  0.3 0.31 0.32 
##    4    9   12    8    4    6    2    4    1    2    2    3    2    1    2    1 
## 0.33 0.34 0.36 0.37 0.39 0.41 0.42 0.43 0.44 0.45 0.46 0.48 0.51 0.52 0.55 0.56 
##    1    4    1    1    1    2    1    1    1    1    2    2    1    1    1    1 
## 0.59 0.66 0.68  0.7 0.71 0.75 0.78 0.81 0.84 0.86 0.87 0.99 1.01 1.29  1.7 1.74 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    2    1    1 
## 1.78 
##    1 
## 
## 3/24-hourly totals for liquid precipitation equivalents:
## 
## pAmounts24Hour
##    0 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.11 0.12 0.14 0.15 0.16 0.17 
## 8692   16   14    7    9    5    4    2    1    2    4    4    1    1    1    3 
## 0.18 0.19 0.21 0.22 0.23 0.24 0.25 0.26 0.28 0.29 0.33 0.34 0.35 0.38 0.41 0.45 
##    3    4    4    1    2    1    1    1    3    1    1    1    1    1    1    2 
##  0.5 0.51 0.52 0.56 0.61 0.64 0.65 0.66 0.76 0.83 0.84 0.97 0.99    1 1.02 1.03 
##    1    1    1    1    1    1    2    1    1    1    2    1    1    1    1    1 
## 1.16 1.23 1.25 1.29 2.15 2.31 
##    1    1    1    1    1    1 
##    zTime p1Hour p3or6Hour p24Hour p6Hour p3Hour
## 1      0   1.95     11.12    0.00  11.12   0.00
## 2      1   1.61      0.00    0.00   0.00   0.00
## 3      2   1.45      0.00    0.00   0.00   0.00
## 4      3   1.76      4.72    0.00   0.00   4.72
## 5      4   1.06      0.00    0.00   0.00   0.00
## 6      5   1.43      0.00    0.00   0.00   0.00
## 7      6   1.79      9.00    0.00   9.00   0.00
## 8      7   2.13      0.00    0.00   0.00   0.00
## 9      8   1.77      0.00    0.00   0.00   0.00
## 10     9   2.08      5.98    0.00   0.00   5.98
## 11    10   0.95      0.00    0.00   0.00   0.00
## 12    11   0.85      0.00    0.00   0.00   0.00
## 13    12   1.01      8.79   34.16   8.79   0.00
## 14    13   0.64      0.00    0.00   0.00   0.00
## 15    14   0.64      0.00    0.00   0.00   0.00
## 16    15   0.65      1.93    0.00   0.00   1.93
## 17    16   0.68      0.00    0.00   0.00   0.00
## 18    17   1.16      0.00    0.00   0.00   0.00
## 19    18   1.83      5.60    0.00   5.60   0.00
## 20    19   0.99      0.00    0.00   0.00   0.00
## 21    20   2.28      0.00    0.00   0.00   0.00
## 22    21   2.23      5.50    0.00   0.00   5.50
## 23    22   1.90      0.00    0.00   0.00   0.00
## 24    23   1.72      0.00    0.00   0.00   0.00
# Check for consistency in the Detroit, MI 2016 precipitation data
checkPrecipConsistency(kdtw2016PrecipTest, title="Detroit, MI 2016 Precipitation by Month", yearsUse=2016)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2015    12    30    12       0      0      0
## 2  2015    12    31    24       0      0      0
## 3  2016     1     1    24       0      0      0
## 4  2016     1     2    24       0      0      0
## 5  2016     1     3    24       0      0      0
## 6  2016     1     4    24       0      0      0
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2016-01    1.32   1.32   1.32
##  2 2016-02    1.94   1.94   1.94
##  3 2016-03    4.84   4.78   4.78
##  4 2016-04    2.35   2.35   2.35
##  5 2016-05    2.16   2.16   2.16
##  6 2016-06    1.44   1.64   1.64
##  7 2016-07    1.23   1.23   1.23
##  8 2016-08    5.36   5.57   5.62
##  9 2016-09    6.55   6.55   6.55
## 10 2016-10    2.71   2.71   2.71
## 11 2016-11    2.1    2.1    2.1 
## 12 2016-12    2.16   2.16   2.16
## p24Hour  p6Hour  p1Hour 
##   34.16   34.51   34.56

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.06  0.2 0.21 
##  363    2    1    1    2 
## 
## 
## Mismatch days of worse than maxDelta inches include
##   month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1     3  31    0.33   0.27   0.27      0.33      0.27  0.06
## 2     6  15    0.00   0.20   0.20      0.20      0.00  0.20
## 3     8  12    0.00   0.00   0.21      0.21      0.00  0.21
## 4     8  14    0.21   0.42   0.26      0.42      0.21  0.21
# Check for missing data and sensor anomalies
checkGapsAnomalies(kdtwRain2016Test, minDay="2016-01-01", maxDay="2016-12-31", loc="Chicago, IL (2016)")
## 
## Data file with new time and anomaly variable
## [1] 8818    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
## 
## Data file filtered to include only desired times
## [1] 8770    9
## [1] "metar"     "dtime"     "dtUse"     "year"      "month"     "day"      
## [7] "ym"        "isAnomaly" "n"        
##       ym                 day              n            anomaly     
##  Length:366         Min.   : 1.00   Min.   :19.00   Min.   : 0.00  
##  Class :character   1st Qu.: 8.00   1st Qu.:24.00   1st Qu.: 0.00  
##  Mode  :character   Median :16.00   Median :24.00   Median : 1.00  
##                     Mean   :15.76   Mean   :23.96   Mean   : 6.27  
##                     3rd Qu.:23.00   3rd Qu.:24.00   3rd Qu.:10.00  
##                     Max.   :31.00   Max.   :24.00   Max.   :24.00  
##     missObs       
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.03825  
##  3rd Qu.:0.00000  
##  Max.   :5.00000

## # A tibble: 12 x 6
##    ym      missObsDays missObs anomalyDays anomaly nDays
##    <chr>         <int>   <dbl>       <int>   <int> <int>
##  1 2016-01           0       0          13     120    31
##  2 2016-02           1       1          14     120    29
##  3 2016-03           1       1          28     338    31
##  4 2016-04           0       0          19     256    30
##  5 2016-05           0       0          18     186    31
##  6 2016-06           1       2          18     241    30
##  7 2016-07           1       5          14     150    31
##  8 2016-08           2       2          22     259    31
##  9 2016-09           0       0          20     234    30
## 10 2016-10           0       0          15     117    31
## 11 2016-11           1       1          17     126    30
## 12 2016-12           1       2          11     148    31

There are just a few one-off mismatches among 1-hour, 6-hour, and 24-hour precipitation sums by day/month in the 2016 data for Minneapolis, MN and Detroit, MI. So the more systemic issues observed in the 2016 Chicago, IL data appear to be specific to Chicago 2016 rather than a general issue with cold-weather cities or reporting issues associated with 2016.

Example #39: Consistency Checks for Precipitation

Automating checks for precipitation may help find the areas where the 1-hour totals, 3/6-hour totals, and 24-hour totals diverge. These are areas for further exploration that can help better assess the monthly liquid precipitation totals.

Example code includes:

# Check for consistency in the Chicago, IL 2015 precipitation data
checkPrecipConsistency(kord2015PrecipTest, title="Chicago, IL 2015 Precipitation by Month", yearsUse=2015)
## # A tibble: 6 x 7
##    year month   day     n p24Hour p6Hour p1Hour
##   <dbl> <dbl> <int> <dbl>   <dbl>  <dbl>  <dbl>
## 1  2014    12    30    12   0       0     0    
## 2  2014    12    31    24   0       0     0    
## 3  2015     1     1    24   0       0     0    
## 4  2015     1     2    24   0.09    0.09  0.09 
## 5  2015     1     3    24   0.570   0.57  0.570
## 6  2015     1     4    24   0.04    0.04  0.04 
## # A tibble: 12 x 4
##    ym      p24Hour p6Hour p1Hour
##    <chr>     <dbl>  <dbl>  <dbl>
##  1 2015-01    1.64   1.64   1.64
##  2 2015-02    1.2    3.4    4.8 
##  3 2015-03    1.13   1.19   1.17
##  4 2015-04    2.87   2.87   2.9 
##  5 2015-05    4.66   4.66   4.66
##  6 2015-06    7.12   7.12   7.1 
##  7 2015-07    2.85   2.85   2.85
##  8 2015-08    2.16   2.16   2.16
##  9 2015-09    4.64   4.64   4.64
## 10 2015-10    2.57   2.48   1.97
## 11 2015-11    4.67   4.67   4.67
## 12 2015-12    3.24   4.47   4.42
## p24Hour  p6Hour  p1Hour 
##   38.75   42.15   42.98

## 
## Mismatch precipitation amounts by day are:
## .
##    0 0.01 0.02 0.03 0.06 0.08 0.09 0.12 0.26 0.37 0.66 0.88 1.26 2.72 
##  344    4    7    1    2    1    1    1    1    1    2    1    1    1 
## 
## 
## Mismatch days of worse than maxDelta inches include
##    month day p24Hour p6Hour p1Hour maxPrecip minPrecip delta
## 1      2   1    0.65   2.84   3.37      3.37      0.65  2.72
## 2      2   2    0.00   0.00   0.88      0.88      0.00  0.88
## 3      3  23    0.37   0.37   0.35      0.37      0.35  0.02
## 4      3  30    0.00   0.09   0.09      0.09      0.00  0.09
## 5      4   2    0.25   0.25   0.28      0.28      0.25  0.03
## 6      6  20    0.31   0.31   0.29      0.31      0.29  0.02
## 7     10  20    0.12   0.38   0.28      0.38      0.12  0.26
## 8     10  21    0.00   0.08   0.02      0.08      0.00  0.08
## 9     10  23    0.39   0.39   0.37      0.39      0.37  0.02
## 10    10  24    0.41   0.29   0.29      0.41      0.29  0.12
## 11    10  27    0.68   0.36   0.31      0.68      0.31  0.37
## 12    10  28    0.80   0.14   0.49      0.80      0.14  0.66
## 13    10  30    0.03   0.03   0.05      0.05      0.03  0.02
## 14    10  31    0.08   0.74   0.08      0.74      0.08  0.66
## 15    12   2    0.12   0.12   0.10      0.12      0.10  0.02
## 16    12  14    0.12   0.12   0.06      0.12      0.06  0.06
## 17    12  28    0.00   1.25   1.26      1.26      0.00  1.26
## 18     1   1    0.00   0.06   0.06      0.06      0.00  0.06
# Check for consistency by time interval in the Chicago, IL 2015 precipitation data

# Code copied from checkPrecipConsistency
hrStart <- 12
dfPrecipHourly <- kord2015PrecipTest %>% 
    mutate(dtUse=dtime-lubridate::hours(hrStart), 
           year=lubridate::year(dtUse), 
           month=lubridate::month(dtUse), 
           day=lubridate::day(dtUse), 
           tod=ifelse(((zTime-hrStart) %% 24)==0, 24, (zTime-hrStart) %% 24),
           h3Block=((tod-1) %/% 3) + 1, 
           h6Block=((tod-1) %/% 6) + 1,
           h24Block=((tod-1) %/% 24) + 1, 
           n=1
           ) %>%
    group_by(year, month, day, h3Block) %>%
    mutate(h3SumHourly=sum(p1Hour)) %>%
    ungroup() %>%
    group_by(year, month, day, h6Block) %>%
    mutate(h6SumHourly=sum(p1Hour)) %>%
    ungroup() %>%
    group_by(year, month, day, h24Block) %>%
    mutate(h24SumHourly=sum(p1Hour), h24Sum6Hourly=sum(p6Hour)) %>%
    ungroup()
    
# Three-hourly sum does not equal sum of hourly sums
threeVone <- dfPrecipHourly %>%
    group_by(year, month, day, h3Block) %>%
    summarize(n=sum(n), p1Hour=sum(p1Hour), p3Hour=sum(p3Hour)) %>%
    filter((h3Block %% 2) != 0, abs(p1Hour-p3Hour) > 0.025)

# Six-hourly sum does not equal sum of hourly sums
sixVone <- dfPrecipHourly %>%
    group_by(year, month, day, h6Block) %>%
    summarize(n=sum(n), p1Hour=sum(p1Hour), p6Hour=sum(p6Hour)) %>%
    filter(abs(p1Hour-p6Hour) > 0.025)

# 24-hourly sum does not equal sum of hourly sums
twentyfourVone <- dfPrecipHourly %>%
    group_by(year, month, day, h24Block) %>%
    summarize(n=sum(n), p1Hour=sum(p1Hour), p24Hour=sum(p24Hour)) %>%
    filter(abs(p24Hour-p1Hour) > 0.025)

# 24-hourly sum does not equal sum of six-hourly sums
twentyfourVsix <- dfPrecipHourly %>%
    group_by(year, month, day, h24Block) %>%
    summarize(n=sum(n), p6Hour=sum(p6Hour), p24Hour=sum(p24Hour)) %>%
    filter(abs(p24Hour-p6Hour) > 0.025)

# For each day, flag whether there is a 6-1, 24-6, and 24-1 issue
sixOneIssue <- sixVone %>%
    group_by(year, month, day) %>%
    summarize(n6_1=n(), p6_1=abs(sum(p1Hour)-sum(p6Hour)))

twentyfourSixIssue <- twentyfourVsix %>%
    group_by(year, month, day) %>%
    summarize(n6_24=n(), p6_24=abs(sum(p24Hour)-sum(p6Hour)))

twentyfourOneIssue <- twentyfourVone %>%
    group_by(year, month, day) %>%
    summarize(n1_24=n(), p1_24=abs(sum(p24Hour)-sum(p1Hour)))

allIssues <- sixOneIssue %>%
    full_join(twentyfourSixIssue) %>%
    full_join(twentyfourOneIssue) %>%
    arrange(year, month, day) %>%
    select(year, month, day, n6_1, n6_24, n1_24, p6_1, p6_24, p1_24)
## Joining, by = c("year", "month", "day")
## Joining, by = c("year", "month", "day")
allIssues
## # A tibble: 13 x 9
## # Groups:   year, month [6]
##     year month   day  n6_1 n6_24 n1_24   p6_1 p6_24  p1_24
##    <dbl> <dbl> <int> <int> <int> <int>  <dbl> <dbl>  <dbl>
##  1  2015     2     1     1     1     1  0.530  2.19  2.72 
##  2  2015     2     2     1    NA     1  0.88  NA     0.88 
##  3  2015     3    30    NA     1     1 NA      0.09  0.09 
##  4  2015     4     2     1    NA     1  0.03  NA     0.03 
##  5  2015    10    20     1     1     1  0.100  0.26  0.160
##  6  2015    10    21     1     1    NA  0.06   0.08 NA    
##  7  2015    10    24    NA     1     1 NA      0.12  0.12 
##  8  2015    10    27     3     1     1  0.05   0.32  0.37 
##  9  2015    10    28     3     1     1  0.35   0.66  0.31 
## 10  2015    10    31     2     1    NA  0.66   0.66 NA    
## 11  2015    12    14     1    NA     1  0.06  NA     0.06 
## 12  2015    12    28    NA     1     1 NA      1.25  1.26 
## 13  2016     1     1    NA     1     1 NA      0.06  0.06
dfPrecipHourly %>%
    filter(year==2015, month==2, day==1) %>%
    select(tod, p1Hour, p6Hour, p24Hour, h6SumHourly, h24SumHourly, h24Sum6Hourly) %>%
    as.data.frame()
##    tod p1Hour p6Hour p24Hour h6SumHourly h24SumHourly h24Sum6Hourly
## 1    1   0.05   0.00    0.00        0.26         3.37          2.84
## 2    2   0.05   0.00    0.00        0.26         3.37          2.84
## 3    3   0.03   0.00    0.00        0.26         3.37          2.84
## 4    4   0.05   0.00    0.00        0.26         3.37          2.84
## 5    5   0.05   0.00    0.00        0.26         3.37          2.84
## 6    6   0.03   0.26    0.00        0.26         3.37          2.84
## 7    7   0.04   0.00    0.00        0.49         3.37          2.84
## 8    8   0.07   0.00    0.00        0.49         3.37          2.84
## 9    9   0.26   0.00    0.00        0.49         3.37          2.84
## 10  10   0.00   0.00    0.00        0.49         3.37          2.84
## 11  11   0.01   0.00    0.00        0.49         3.37          2.84
## 12  12   0.11   0.49    0.00        0.49         3.37          2.84
## 13  13   0.43   0.00    0.00        2.04         3.37          2.84
## 14  14   0.04   0.00    0.00        2.04         3.37          2.84
## 15  15   0.77   0.00    0.00        2.04         3.37          2.84
## 16  16   0.29   0.00    0.00        2.04         3.37          2.84
## 17  17   0.31   0.00    0.00        2.04         3.37          2.84
## 18  18   0.20   2.04    0.00        2.04         3.37          2.84
## 19  19   0.18   0.00    0.00        0.58         3.37          2.84
## 20  20   0.00   0.00    0.00        0.58         3.37          2.84
## 21  21   0.02   0.00    0.00        0.58         3.37          2.84
## 22  22   0.33   0.00    0.00        0.58         3.37          2.84
## 23  23   0.03   0.00    0.00        0.58         3.37          2.84
## 24  24   0.02   0.05    0.65        0.58         3.37          2.84
dfPrecipHourly %>%
    filter(year==2015, month==2, day==2) %>%
    select(tod, p1Hour, p6Hour, p24Hour, h6SumHourly, h24SumHourly, h24Sum6Hourly) %>%
    as.data.frame()
##    tod p1Hour p6Hour p24Hour h6SumHourly h24SumHourly h24Sum6Hourly
## 1    1   0.00      0       0        0.88         0.88             0
## 2    2   0.88      0       0        0.88         0.88             0
## 3    3   0.00      0       0        0.88         0.88             0
## 4    4   0.00      0       0        0.88         0.88             0
## 5    5   0.00      0       0        0.88         0.88             0
## 6    6   0.00      0       0        0.88         0.88             0
## 7    7   0.00      0       0        0.00         0.88             0
## 8    8   0.00      0       0        0.00         0.88             0
## 9    9   0.00      0       0        0.00         0.88             0
## 10  10   0.00      0       0        0.00         0.88             0
## 11  11   0.00      0       0        0.00         0.88             0
## 12  12   0.00      0       0        0.00         0.88             0
## 13  13   0.00      0       0        0.00         0.88             0
## 14  14   0.00      0       0        0.00         0.88             0
## 15  15   0.00      0       0        0.00         0.88             0
## 16  16   0.00      0       0        0.00         0.88             0
## 17  17   0.00      0       0        0.00         0.88             0
## 18  18   0.00      0       0        0.00         0.88             0
## 19  19   0.00      0       0        0.00         0.88             0
## 20  20   0.00      0       0        0.00         0.88             0
## 21  21   0.00      0       0        0.00         0.88             0
## 22  22   0.00      0       0        0.00         0.88             0
## 23  23   0.00      0       0        0.00         0.88             0
## 24  24   0.00      0       0        0.00         0.88             0
dfPrecipHourly %>%
    filter(year==2015, month==10, day==20) %>%
    select(tod, p1Hour, p6Hour, p24Hour, h6SumHourly, h24SumHourly, h24Sum6Hourly) %>%
    as.data.frame()
##    tod p1Hour p6Hour p24Hour h6SumHourly h24SumHourly h24Sum6Hourly
## 1    1   0.00   0.00    0.00        0.00         0.28          0.38
## 2    2   0.00   0.00    0.00        0.00         0.28          0.38
## 3    3   0.00   0.00    0.00        0.00         0.28          0.38
## 4    4   0.00   0.00    0.00        0.00         0.28          0.38
## 5    5   0.00   0.00    0.00        0.00         0.28          0.38
## 6    6   0.00   0.00    0.00        0.00         0.28          0.38
## 7    7   0.00   0.00    0.00        0.00         0.28          0.38
## 8    8   0.00   0.00    0.00        0.00         0.28          0.38
## 9    9   0.00   0.00    0.00        0.00         0.28          0.38
## 10  10   0.00   0.00    0.00        0.00         0.28          0.38
## 11  11   0.00   0.00    0.00        0.00         0.28          0.38
## 12  12   0.00   0.00    0.00        0.00         0.28          0.38
## 13  13   0.01   0.00    0.00        0.27         0.28          0.38
## 14  14   0.00   0.00    0.00        0.27         0.28          0.38
## 15  15   0.11   0.00    0.00        0.27         0.28          0.38
## 16  16   0.15   0.00    0.00        0.27         0.28          0.38
## 17  17   0.00   0.00    0.00        0.27         0.28          0.38
## 18  18   0.00   0.37    0.00        0.27         0.28          0.38
## 19  19   0.01   0.00    0.00        0.01         0.28          0.38
## 20  20   0.00   0.00    0.00        0.01         0.28          0.38
## 21  21   0.00   0.00    0.00        0.01         0.28          0.38
## 22  22   0.00   0.00    0.00        0.01         0.28          0.38
## 23  23   0.00   0.00    0.00        0.01         0.28          0.38
## 24  24   0.00   0.01    0.12        0.01         0.28          0.38
dfPrecipHourly %>%
    filter(year==2015, month==10, day==21) %>%
    select(tod, p1Hour, p6Hour, p24Hour, h6SumHourly, h24SumHourly, h24Sum6Hourly) %>%
    as.data.frame()
##    tod p1Hour p6Hour p24Hour h6SumHourly h24SumHourly h24Sum6Hourly
## 1    1   0.00   0.00       0        0.00         0.02          0.08
## 2    2   0.00   0.00       0        0.00         0.02          0.08
## 3    3   0.00   0.00       0        0.00         0.02          0.08
## 4    4   0.00   0.00       0        0.00         0.02          0.08
## 5    5   0.00   0.00       0        0.00         0.02          0.08
## 6    6   0.00   0.00       0        0.00         0.02          0.08
## 7    7   0.00   0.00       0        0.02         0.02          0.08
## 8    8   0.00   0.00       0        0.02         0.02          0.08
## 9    9   0.00   0.00       0        0.02         0.02          0.08
## 10  10   0.02   0.00       0        0.02         0.02          0.08
## 11  11   0.00   0.00       0        0.02         0.02          0.08
## 12  12   0.00   0.08       0        0.02         0.02          0.08
## 13  13   0.00   0.00       0        0.00         0.02          0.08
## 14  14   0.00   0.00       0        0.00         0.02          0.08
## 15  15   0.00   0.00       0        0.00         0.02          0.08
## 16  16   0.00   0.00       0        0.00         0.02          0.08
## 17  17   0.00   0.00       0        0.00         0.02          0.08
## 18  18   0.00   0.00       0        0.00         0.02          0.08
## 19  19   0.00   0.00       0        0.00         0.02          0.08
## 20  20   0.00   0.00       0        0.00         0.02          0.08
## 21  21   0.00   0.00       0        0.00         0.02          0.08
## 22  22   0.00   0.00       0        0.00         0.02          0.08
## 23  23   0.00   0.00       0        0.00         0.02          0.08
## 24  24   0.00   0.00       0        0.00         0.02          0.08
allIssues %>%
    filter(is.na(p6_1) | is.na(p6_24) | is.na(p1_24))
## # A tibble: 9 x 9
## # Groups:   year, month [6]
##    year month   day  n6_1 n6_24 n1_24  p6_1 p6_24 p1_24
##   <dbl> <dbl> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1  2015     2     2     1    NA     1  0.88 NA     0.88
## 2  2015     3    30    NA     1     1 NA     0.09  0.09
## 3  2015     4     2     1    NA     1  0.03 NA     0.03
## 4  2015    10    21     1     1    NA  0.06  0.08 NA   
## 5  2015    10    24    NA     1     1 NA     0.12  0.12
## 6  2015    10    31     2     1    NA  0.66  0.66 NA   
## 7  2015    12    14     1    NA     1  0.06 NA     0.06
## 8  2015    12    28    NA     1     1 NA     1.25  1.26
## 9  2016     1     1    NA     1     1 NA     0.06  0.06
allIssues %>%
    filter(!(is.na(p6_1) | is.na(p6_24) | is.na(p1_24)))
## # A tibble: 4 x 9
## # Groups:   year, month [2]
##    year month   day  n6_1 n6_24 n1_24  p6_1 p6_24 p1_24
##   <dbl> <dbl> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1  2015     2     1     1     1     1 0.530  2.19 2.72 
## 2  2015    10    20     1     1     1 0.100  0.26 0.160
## 3  2015    10    27     3     1     1 0.05   0.32 0.37 
## 4  2015    10    28     3     1     1 0.35   0.66 0.31

Looking at February 1, 2015 as an example shows the issue may not be easy to track down. There is no obvious single entry error driving the large disconnects. Rather, the 6-hourly observations seem somewhat tethered to the hourly observations while the 24-hour observation is very different than the sum of either the 1-hourly observations or the 6-hourly observations.

Looking at February 2, 2015, there is a single anomalous record in the p1Hour column that likely accounts for the full disconnect. Checking official precipitation records may be helpful to vet that there was no precipitation on this day.

Looking at October 20, 2015, there is a possible anomaly around hour 16 (not in 24-hour total) and hour 18 (sums to 0.1 greater than hourly totals) that drive the disconnect.

Looking at Octiber 21, 2015, there is a possible anomaly at hours 10 and 12 where precipitations are recorded (with 1-hour and 6-hour not aligned) despite nothing in the 24-hour total.

Further investigation such as checking the official precipitation records may be needed.

Example #40: External Source for Precipitation Checking

Data are maintained for Chicago precipitation totals by month at https://www.weather.gov/lot/

While the table is not ideally formatted, it appears possible to pull down the data.

Example code includes:

# cached to avoid repeated hits against the weather.gov server

# # Define the website and xpath for the table
# webSite <- 'https://www.weather.gov/lot/July_Precip_Rankings_Chicago'
# webXPath <- '//*[@id=\"pagebody\"]/div[3]/div/table[2]'
# 
# # Pull down the table
# webData <- webSite %>% 
#     xml2::read_html() %>% 
#     rvest::html_nodes(xpath=webXPath) %>% 
#     rvest::html_table(fill=TRUE)
# 
# # Data appear to be stored in a list containing a frame
# webDF <- webData[[1]]
# 
# # Row 1 appears to have the data, but the first two columns are filler
# webUse <- webDF[1, 3:ncol(webDF)]
# webUse
# 
# # The order of the data are a recurrence of (Precip - Year - NA) (Precip - Year - NA) (Precip - Year)
# # So, for each 8 columns, precipitation will be 1-4-7 and year will be 2-5-8
# maxRead <- ncol(webUse) %/% 8
# precipBase <- 8*(1:maxRead) - 7
# yearBase <- 8*(1:maxRead) - 6
# 
# precipData <- webUse[, c(precipBase, precipBase+3, precipBase+6)]
# yearData <- webUse[, c(yearBase, yearBase+3, yearBase+6)]
# 
# dfPrecip <- tibble::tibble(precipData=as.vector(as.matrix(precipData)), 
#                            yearData=as.vector(as.matrix(yearData))
#                            )
# dfPrecip


# Extend to Function for all 12 months
getPrecipMonthlyData <- function(webSite, webXPath) {
    
    # Pull down the table
    webData <- webSite %>% 
        xml2::read_html() %>% 
        rvest::html_nodes(xpath=webXPath) %>% 
        rvest::html_table(fill=TRUE)

    # Data appear to be stored in a list containing a frame
    webDF <- webData[[1]]

    # Row 1 appears to have the data, but the first two columns are filler
    webUse <- webDF[1, 3:ncol(webDF)]

    # The order of the data are a recurrence of (Precip - Year - NA) (Precip - Year - NA) (Precip - Year)
    # So, for each 8 columns, precipitation will be 1-4-7 and year will be 2-5-8
    maxRead <- ncol(webUse) %/% 8
    precipBase <- 8*(1:maxRead) - 7
    yearBase <- 8*(1:maxRead) - 6

    precipData <- webUse[, c(precipBase, precipBase+3, precipBase+6)]
    yearData <- webUse[, c(yearBase, yearBase+3, yearBase+6)]

    dfPrecip <- tibble::tibble(precipData=as.vector(as.matrix(precipData)), 
                               yearData=as.vector(as.matrix(yearData))
                               )
    dfPrecip

}

getPrecipMonthlyData(webSite='https://www.weather.gov/lot/January_Precip_Rankings_Chicago', 
                     webXPath='//*[@id=\"pagebody\"]/div[3]/div/table[2]'
                     )
## # A tibble: 150 x 2
##    precipData yearData
##         <dbl>    <dbl>
##  1      0.1       1981
##  2      0.2       1919
##  3      0.26      1961
##  4      0.36      2003
##  5      0.38      1956
##  6      0.39      1986
##  7      0.54      1879
##  8      0.570     1931
##  9      0.580     1899
## 10      0.66      1902
## # ... with 140 more rows
listPrecip <- vector("list", 12)
for (x in 1:12) {
    webSite <- paste0("https://www.weather.gov/lot/", month.name[x], "_Precip_Rankings_Chicago")
    if (x %in% c(3, 5, 7)) { 
        listPrecip[[x]] <- NA
        cat("\nPassed on pulling data for x = ", x)
    }
    else {
        listPrecip[[x]] <- getPrecipMonthlyData(webSite, 
                                                webXPath='//*[@id=\"pagebody\"]/div[3]/div/table[2]'
                                                )
        cat("\nPulled data for x =", x)
    }
}
## 
## Pulled data for x = 1
## Pulled data for x = 2
## Passed on pulling data for x =  3
## Pulled data for x = 4
## Passed on pulling data for x =  5
## Pulled data for x = 6
## Passed on pulling data for x =  7
## Pulled data for x = 8
## Pulled data for x = 9
## Pulled data for x = 10
## Pulled data for x = 11
## Pulled data for x = 12

Data for March, May, and July have a different formatting and could not parse with the automated routine. The remaining data can be bound in to a single data frame:

listPrecip[[3]] <- data.frame(precipData=NA, yearData=NA)
listPrecip[[5]] <- data.frame(precipData=NA, yearData=NA)
listPrecip[[7]] <- data.frame(precipData=NA, yearData=NA)

officialPrecip <- bind_rows(listPrecip, .id="month")

officialPrecip %>%
    filter(yearData==2016)
## # A tibble: 9 x 3
##   month precipData yearData
##   <chr>      <dbl>    <dbl>
## 1 1           0.84     2016
## 2 2           1.23     2016
## 3 4           2.8      2016
## 4 6           2.85     2016
## 5 8           4.26     2016
## 6 9           1.76     2016
## 7 10          3.77     2016
## 8 11          1.69     2016
## 9 12          1.77     2016

Example #41: Examining Wind Direction by Season

Winds tend to change with month as can be seen in the METAR data.

Example code includes:

wdTypes <- c("Error", "None", "Variable", 
             "NNW", "N", "NNE", 
             "ENE", "E", "ESE", 
             "SSE", "S", "SSW", 
             "WSW", "W", "WNW"
             )

plotWindData <- function(lst, subT, extraVars=NULL, showPlots=TRUE) {

    baseVars <- c("valid", "dirW", "spdW")
    if (!(is.null(extraVars))) {
        allVars <- c(baseVars, extraVars)
    } else {
        allVars <- baseVars
    }
    
    # Extract the wind data
    # Pull the wind speed and wind direction, and classify accordingly
    windData <- lst[["fullMETAR"]] %>%
        rename(valid=dtime, dirW=WindDir, spdW=WindSpeed) %>%
        select_at(vars(all_of(allVars))) %>%
        filter(complete.cases(.)) %>%
        mutate(spdBucket=factor(case_when(spdW==0 ~ "None", 
                                          spdW<=6 ~ "Light", 
                                          spdW<=12 ~ "Moderate", 
                                          TRUE ~ "Strong"
                                          ), 
                                levels=c("Strong", "Moderate", "Light", "None")
                                ), 
               wd=factor(case_when(dirW=="VRB" ~ "Variable", 
                                   dirW=="000" ~ "None", 
                                   dirW %in% c("350", "360", "010") ~ "N", 
                                   dirW %in% c("020", "030", "040") ~ "NNE", 
                                   dirW %in% c("050", "060", "070") ~ "ENE", 
                                   dirW %in% c("080", "090", "100") ~ "E",
                                   dirW %in% c("110", "120", "130") ~ "ESE", 
                                   dirW %in% c("140", "150", "160") ~ "SSE", 
                                   dirW %in% c("170", "180", "190") ~ "S", 
                                   dirW %in% c("200", "210", "220") ~ "SSW", 
                                   dirW %in% c("230", "240", "250") ~ "WSW", 
                                   dirW %in% c("260", "270", "280") ~ "W", 
                                   dirW %in% c("290", "300", "310") ~ "WNW", 
                                   dirW %in% c("320", "330", "340") ~ "NNW", 
                                   TRUE ~ "Error"
                                   ), 
                         levels=wdTypes
                         ), 
               predomDir=factor(case_when(wd=="Variable" ~ "Variable", 
                                          wd=="None" ~ "None", 
                                          wd %in% c("NNW", "N", "NNE") ~ "North",
                                          wd %in% c("ENE", "E", "ESE") ~ "East", 
                                          wd %in% c("SSE", "S", "SSW") ~ "South", 
                                          wd %in% c("WSW", "W", "WNW") ~ "West"
                                          ), 
                                levels=c("North", "East", "South", "West", "Variable", "None")
                                ),
               month=lubridate::month(valid), 
               monthfct=factor(month.abb[month], levels=month.abb[1:12])
               )

    # Summary of wind speeds and directions
    windData %>% 
        count(spdBucket, wd) %>% 
        pivot_wider(id_cols=c("wd"), names_from="spdBucket", values_from="n") %>%
        print()

    # Plot of wind speeds and directions - stacked bars
    p1 <- windData %>%
        ggplot(aes(x=wd, fill=spdBucket)) + 
        geom_bar() + 
        labs(x="Wind Direction", y="# Hourly Observations", 
             title="Wind Speed by Wind Direction", subtitle=subT
             ) + 
        scale_fill_discrete("Wind Speed")
    if (showPlots) print(p1)
    
    # Plot of wind speeds and directions - filled bars
    p2 <- windData %>%
        ggplot(aes(x=wd, fill=spdBucket)) + 
        geom_bar(position="fill") + 
        labs(x="Wind Direction", y="% Hourly Observations", 
             title="Wind Speed by Wind Direction", subtitle=subT
             ) + 
        scale_fill_discrete("Wind Speed")
    if (showPlots) print(p2)

    # Plot of wind speeds and predominant directions - filled bars
    p3 <- windData %>%
        filter(spdBucket != "None") %>%
        ggplot(aes(x=spdBucket, fill=predomDir)) + 
        geom_bar(position="fill") + 
        labs(x="Wind Speed", y="% Hourly Observations", 
             title="Wind Speed by Wind Direction", subtitle=subT
             ) + 
        scale_fill_discrete("Wind Direction")
    if (showPlots) print(p3)
    
    # Plot of months and wind speeds - filled bars
    p4 <- windData %>%
        ggplot(aes(x=monthfct, fill=spdBucket)) + 
        geom_bar(position="fill") + 
        labs(x="", y="% Hourly Observations", 
             title="Wind Speed by Month", subtitle=subT
             ) + 
        scale_fill_discrete("Wind Speed")
    if (showPlots) print(p4)
    
    # Plot of months and wind directions - filled bars
    p5 <- windData %>%
        ggplot(aes(x=monthfct, fill=predomDir)) + 
        geom_bar(position="fill") + 
        labs(x="", y="% Hourly Observations", 
             title="Wind Direction by Month", subtitle=subT
             ) + 
        scale_fill_discrete("Wind Direction")
    if (showPlots) print(p5)
    
    windData    
}

# Run for Lincoln, NE (2016)
klnk2016Wind <- plotWindData(klnk2016METAR, subT="Lincoln, NE (2016)")
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         351      324   213    NA
##  2 N           272      378   415    NA
##  3 NNE          62      240   178    NA
##  4 ENE          47      158   118    NA
##  5 E            16      117   122    NA
##  6 ESE          64      300   208    NA
##  7 SSE         245      455   342    NA
##  8 S           454      631   409    NA
##  9 SSW         193      262   150    NA
## 10 WSW          30      105   114    NA
## 11 W            43      133   139    NA
## 12 WNW         150      207   156    NA
## 13 Variable     NA       NA   114    NA
## 14 None         NA       NA    NA   875

# Run for Las Vegas, NV (2016)
klas2016Wind <- plotWindData(klas2016METAR, subT="Las Vegas, NV (2016)")
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         159      102    85    NA
##  2 N            56      140   172    NA
##  3 NNE          55      202   235    NA
##  4 ENE          35      189   301    NA
##  5 E             7      128   254    NA
##  6 ESE           3       35   140    NA
##  7 SSE          14      161   181    NA
##  8 S           140      617   605    NA
##  9 SSW         215      405  1029    NA
## 10 WSW         110      196   377    NA
## 11 W            25       52   270    NA
## 12 WNW          17       47   127    NA
## 13 Variable     NA       NA   503    NA
## 14 None         NA       NA    NA  1394

# Run for Chicago, IL (2016)
kord2016Wind <- plotWindData(kord2016METAR, subT="Chicago, IL (2016)")
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         166      274   176    NA
##  2 N            80      226   245    NA
##  3 NNE         127      352   179    NA
##  4 ENE          56      333   172    NA
##  5 E            27      274   170    NA
##  6 ESE          37      145    84    NA
##  7 SSE          42      263   201    NA
##  8 S           179      427   302    NA
##  9 SSW         217      406   298    NA
## 10 WSW         179      429   333    NA
## 11 W           286      490   265    NA
## 12 WNW         193      361   164    NA
## 13 Variable     NA       NA   137    NA
## 14 None         NA       NA    NA   510

Lincoln, NE 2016 findings:

  • Winds are frequently from the North (NNW, N) and South (SSE, S), particularly with Strong winds
  • Winds are more often strong in late winter and early spring, peaking around April
  • North winds are more common in February while South winds are more common in summer

Las Vegas, NV 2016 findings:

  • Winds are predominatly either calm or from the South (S, SSW, WSW)
  • Winds from the NNW, while rare, were strong nearly half the time they occurred
  • Winds are generally calm or light from November to February, with moderate winds becoming more common during the summer months
  • Winds from the south are especially common in May to July

Chicago, IL 2016 findings:

  • Winds are faily evenly distributed by direction, though with troughs near East and spikes near West
  • Strong winds are less likely to be from the East and more likely to be from the West than moderate or light winds
  • Winds are more likely to be strong in winter and to be light in summer
  • Wind directions change meaningfully, with East being rare in the winter and more common in spring-summer; and with West being more common in winter

Example #42: Predicting Locale Based on Wind Data

Are there sufficient differences in wind by month to make predictions as to which locale the data are sampled from? A random forest can provide a first cut at assessing that.

Example code includes:

windData <- bind_rows(klnk=klnk2016Wind, klas=klas2016Wind, kord=kord2016Wind, .id="source") %>%
    mutate(source=factor(source))
windData
## # A tibble: 26,378 x 9
##    source valid               dirW   spdW spdBucket wd    predomDir month
##    <fct>  <dttm>              <chr> <int> <fct>     <fct> <fct>     <dbl>
##  1 klnk   2015-12-31 00:54:00 300       5 Light     WNW   West         12
##  2 klnk   2015-12-31 01:54:00 000       0 None      None  None         12
##  3 klnk   2015-12-31 02:54:00 000       0 None      None  None         12
##  4 klnk   2015-12-31 03:54:00 280       3 Light     W     West         12
##  5 klnk   2015-12-31 04:54:00 310       5 Light     WNW   West         12
##  6 klnk   2015-12-31 05:54:00 010       9 Moderate  N     North        12
##  7 klnk   2015-12-31 06:54:00 000       0 None      None  None         12
##  8 klnk   2015-12-31 07:54:00 010       3 Light     N     North        12
##  9 klnk   2015-12-31 08:54:00 000       0 None      None  None         12
## 10 klnk   2015-12-31 09:54:00 000       0 None      None  None         12
## # ... with 26,368 more rows, and 1 more variable: monthfct <fct>
set.seed(2005061331)
trainIdx <- sample(1:nrow(windData), round(0.7*nrow(windData), 0), replace=FALSE) %>% sort()

trainData <- windData[trainIdx, ]
testData <- windData[-trainIdx, ]

# Attempt a basic random forest on just the speed bucket, wind direction, and month
rfInit <- trainData %>%
    select(source, spdBucket, wd, monthfct) %>%
    randomForest::randomForest(source ~ spdBucket + wd + monthfct, data=.)
rfInit
## 
## Call:
##  randomForest(formula = source ~ spdBucket + wd + monthfct, data = .) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 1
## 
##         OOB estimate of  error rate: 47.03%
## Confusion matrix:
##      klas klnk kord class.error
## klas 3916 1206 1013   0.3616952
## klnk 1774 3207 1173   0.4788755
## kord 1777 1741 2658   0.5696244
# Attempt a basic random forest on just the speed bucket, wind direction, and month
rfmtry2 <- trainData %>%
    select(source, spdBucket, wd, monthfct) %>%
    randomForest::randomForest(source ~ spdBucket + wd + monthfct, data=., mtry=2)
rfmtry2
## 
## Call:
##  randomForest(formula = source ~ spdBucket + wd + monthfct, data = .,      mtry = 2) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 46%
## Confusion matrix:
##      klas klnk kord class.error
## klas 3906 1157 1072   0.3633252
## klnk 1694 3182 1278   0.4829379
## kord 1632 1661 2883   0.5331930
# Predictions and confusion matrices on test data
evalWindPredictions <- function(model, 
                                testData, 
                                printAll=TRUE,
                                printCM=printAll, 
                                printConfSummary=printAll, 
                                printConfTable=printAll, 
                                showPlots=TRUE
                                ) {
    
    # Get the predicted class and probabilities
    testClass <- predict(model, newdata=testData)
    testProbs <- predict(model, newdata=testData, type="prob")
    if (printCM) {
        print(caret::confusionMatrix(testClass, testData$source))
    }
    
    # Create a tibble containing class prediction, maximum probability, and individual predictions
    tblProbs <- tibble::as_tibble(testProbs) %>%
        mutate(maxProb=apply(., 1, FUN=max), 
               sumProb=apply(., 1, FUN=sum), 
               predClass=testClass, 
               source=testData$source, 
               accurate=(predClass==source)
               )
    
    # Describe the maximum probability by source
    if (printConfSummary) {
        tblProbs %>%
            group_by(source) %>%
            summarize(meanMax=mean(maxProb), medianMax=median(maxProb), 
                      pct90Plus=mean(maxProb > 0.9), pct50Minus=mean(maxProb < 0.5)
                      ) %>%
        print()
    }
    
    # Create a table of accuracy by source and prediction confidence
    p1Data <- tblProbs %>%
        mutate(predProb=0.5 * round(2*maxProb, 1)) %>%
        group_by(predProb, source) %>%
        summarize(pctCorrect=mean(accurate), nCorrect=sum(accurate), nObs=n())
    
    p1Print <- p1Data %>%
        group_by(predProb) %>%
        summarize(nCorrect=sum(nCorrect), nObs=sum(nObs)) %>%
        mutate(pctCorrect=nCorrect/nObs)
    if (printConfTable) {
        print(p1Print)
    }
    
    cat("\nMean Error-Squared Between Confidence of Prediction and Accuracy of Precition\n")
    p1Print %>%
        mutate(err2=nObs*(pctCorrect-predProb)**2) %>%
        summarize(meanError2=sum(err2)/sum(nObs)) %>%
        print()
    
    # Plot the maximum probability forecasted by row
    p1 <- p1Data %>%
        ggplot(aes(x=predProb)) +
        geom_col(aes(y=nObs, fill=source)) + 
        labs(x="Maximum probability predicted", y="# Observations", 
             title="Count of Maximum Probability Predicted by Locale"
             )
    p2 <- p1Data %>%
        ggplot(aes(x=predProb)) +
        geom_line(aes(y=pctCorrect, group=source, color=source)) + 
        geom_abline(aes(intercept=0, slope=1), lty=2) +
        ylim(c(0, 1)) + 
        labs(x="Maximum probability predicted", y="Actual Probability Correct", 
             title="Accuracy of Maximum Probability Predicted by Locale"
             )
    
    if (showPlots) {
        print(p1)
        print(p2)
    }
    
    tblProbs
    
}

rfInitAccData <- evalWindPredictions(rfInit, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1639  751  726
##       klnk  532 1398  749
##       kord  477  487 1154
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5296          
##                  95% CI : (0.5186, 0.5407)
##     No Information Rate : 0.3346          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2943          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6190      0.5303      0.4390
## Specificity               0.7195      0.7572      0.8176
## Pos Pred Value            0.5260      0.5218      0.5449
## Neg Pred Value            0.7897      0.7635      0.7455
## Prevalence                0.3346      0.3331      0.3322
## Detection Rate            0.2071      0.1767      0.1458
## Detection Prevalence      0.3938      0.3386      0.2677
## Balanced Accuracy         0.6692      0.6438      0.6283
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.788     0.822     0.410     0.0940
## 2 klnk     0.752     0.752     0.275     0.0941
## 3 kord     0.763     0.772     0.319     0.0795
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35       15    29      0.517
##  2     0.4        51   112      0.455
##  3     0.45      121   307      0.394
##  4     0.5       189   553      0.342
##  5     0.55      144   334      0.431
##  6     0.6       285   620      0.460
##  7     0.65      243   516      0.471
##  8     0.7       224   452      0.496
##  9     0.75      504   996      0.506
## 10     0.8       356   673      0.529
## 11     0.85      302   541      0.558
## 12     0.9       414   722      0.573
## 13     0.95      501   838      0.598
## 14     1         842  1220      0.690
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0659

rfmtry2AccData <- evalWindPredictions(rfmtry2, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1646  726  662
##       klnk  509 1366  711
##       kord  493  544 1256
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5394          
##                  95% CI : (0.5283, 0.5504)
##     No Information Rate : 0.3346          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3089          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6216      0.5182      0.4777
## Specificity               0.7364      0.7688      0.8037
## Pos Pred Value            0.5425      0.5282      0.5478
## Neg Pred Value            0.7946      0.7616      0.7557
## Prevalence                0.3346      0.3331      0.3322
## Detection Rate            0.2080      0.1726      0.1587
## Detection Prevalence      0.3834      0.3268      0.2898
## Balanced Accuracy         0.6790      0.6435      0.6407
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.864     0.955     0.608     0.0276
## 2 klnk     0.842     0.918     0.525     0.0266
## 3 kord     0.840     0.922     0.533     0.0350
## # A tibble: 13 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.4        18    32      0.562
##  2     0.45       41   111      0.369
##  3     0.5        88   237      0.371
##  4     0.55      248   589      0.421
##  5     0.6       154   344      0.448
##  6     0.65      127   307      0.414
##  7     0.7       154   375      0.411
##  8     0.75      167   343      0.487
##  9     0.8       199   397      0.501
## 10     0.85      238   496      0.480
## 11     0.9       329   619      0.532
## 12     0.95      666  1244      0.535
## 13     1        1839  2819      0.652
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1      0.106

So, a basic random forest gets to 54% accuracy with the null accuracy being 34%. However, the model is very over-confident in many of its predictions, perhaps driven by so many permutations of the factors (12 months, 12 wind directions, 3 wind speeds) with so few days. So, rather than learning light and strong winds, the model may just be memorizing the single data point in certain grids.

Can accuracy or at least over-confidence be addressed by allowing the model to use the actual numeric wind speed?

# Attempt a basic random forest on just the speed bucket, wind direction, and month
rfmtry2Num <- trainData %>%
    select(source, spdW, wd, monthfct) %>%
    randomForest::randomForest(source ~ spdW + wd + monthfct, data=., mtry=2)
rfmtry2Num
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct, data = .,      mtry = 2) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 46.29%
## Confusion matrix:
##      klas klnk kord class.error
## klas 3866 1221 1080   0.3731150
## klnk 1668 3025 1463   0.5086095
## kord 1542 1583 3037   0.5071405
# Print accuracy and confidence
rfmtry2NumAccData <- evalWindPredictions(rfmtry2Num, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1585  677  624
##       klnk  561 1304  703
##       kord  486  657 1325
## 
## Overall Statistics
##                                          
##                Accuracy : 0.5319         
##                  95% CI : (0.5209, 0.543)
##     No Information Rate : 0.3348         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.298          
##                                          
##  Mcnemar's Test P-Value : 1.69e-06       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6022      0.4943      0.4996
## Specificity               0.7541      0.7608      0.7831
## Pos Pred Value            0.5492      0.5078      0.5369
## Neg Pred Value            0.7921      0.7508      0.7567
## Prevalence                0.3322      0.3330      0.3348
## Detection Rate            0.2001      0.1646      0.1673
## Detection Prevalence      0.3643      0.3242      0.3115
## Balanced Accuracy         0.6781      0.6276      0.6414
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.801     0.86      0.445     0.0718
## 2 klnk     0.764     0.777     0.303     0.0697
## 3 kord     0.773     0.792     0.337     0.0705
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        7    12      0.583
##  2     0.4        52   127      0.409
##  3     0.45      104   271      0.384
##  4     0.5       172   434      0.396
##  5     0.55      245   614      0.399
##  6     0.6       252   575      0.438
##  7     0.65      231   500      0.462
##  8     0.7       242   516      0.469
##  9     0.75      300   606      0.495
## 10     0.8       286   565      0.506
## 11     0.85      297   541      0.549
## 12     0.9       347   651      0.533
## 13     0.95      564   896      0.629
## 14     1        1115  1614      0.691
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0697

The model is a touch less over-confident, though overall prediction accuracy remains at 54%.

So, still much work to do to make more meaningful predictions. There is a lot of overlap among the wind data by city, so it is surprising that the model has so many high-confidence predictions that do not actually work out.

Further avenues to explore include 1) a lot more data for each city, and 2) further compressing the levels for the factors so that trends are learned rather than individual data points being memorized.

Example #43: Further Predicting Locale Based on Wind Data

Is the model appropriately confident on the training data?

Example code includes:

library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
rfInitAccDataTrain <- evalWindPredictions(rfInit, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 3942 1760 1760
##       klnk 1202 3242 1718
##       kord  991 1152 2698
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5352          
##                  95% CI : (0.5279, 0.5424)
##     No Information Rate : 0.3345          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3029          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6425      0.5268      0.4369
## Specificity               0.7145      0.7628      0.8256
## Pos Pred Value            0.5283      0.5261      0.5573
## Neg Pred Value            0.8007      0.7633      0.7447
## Prevalence                0.3323      0.3333      0.3345
## Detection Rate            0.2135      0.1756      0.1461
## Detection Prevalence      0.4041      0.3337      0.2622
## Balanced Accuracy         0.6785      0.6448      0.6312
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.793     0.83      0.426     0.0980
## 2 klnk     0.759     0.77      0.274     0.0890
## 3 kord     0.762     0.772     0.316     0.0866
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35       35    88      0.398
##  2     0.4       106   281      0.377
##  3     0.45      264   722      0.366
##  4     0.5       422  1168      0.361
##  5     0.55      327   842      0.388
##  6     0.6       576  1258      0.458
##  7     0.65      527  1213      0.434
##  8     0.7       468  1020      0.459
##  9     0.75     1184  2286      0.518
## 10     0.8       936  1648      0.568
## 11     0.85      782  1342      0.583
## 12     0.9      1080  1773      0.609
## 13     0.95     1207  1978      0.610
## 14     1        1968  2846      0.691
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0622

rfmtry2AccDataTrain <- evalWindPredictions(rfmtry2, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 4027 1658 1597
##       klnk 1101 3296 1546
##       kord 1007 1200 3033
## 
## Overall Statistics
##                                          
##                Accuracy : 0.5608         
##                  95% CI : (0.5537, 0.568)
##     No Information Rate : 0.3345         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3414         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6564      0.5356      0.4911
## Specificity               0.7360      0.7850      0.8204
## Pos Pred Value            0.5530      0.5546      0.5788
## Neg Pred Value            0.8115      0.7718      0.7623
## Prevalence                0.3323      0.3333      0.3345
## Detection Rate            0.2181      0.1785      0.1643
## Detection Prevalence      0.3944      0.3219      0.2838
## Balanced Accuracy         0.6962      0.6603      0.6558
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.868     0.962     0.616     0.0300
## 2 klnk     0.846     0.922     0.540     0.0314
## 3 kord     0.842     0.924     0.537     0.0327
## # A tibble: 13 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.4        28    75      0.373
##  2     0.45      100   280      0.357
##  3     0.5       210   526      0.399
##  4     0.55      544  1303      0.417
##  5     0.6       385   847      0.455
##  6     0.65      282   623      0.453
##  7     0.7       425   902      0.471
##  8     0.75      386   769      0.502
##  9     0.8       456   889      0.513
## 10     0.85      596  1183      0.504
## 11     0.9       790  1431      0.552
## 12     0.95     1543  2834      0.544
## 13     1        4611  6803      0.678
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0936

rfmtry2NumAccDataTrain <- evalWindPredictions(rfmtry2Num, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 4063 1462 1391
##       klnk 1110 3475 1352
##       kord  962 1217 3433
## 
## Overall Statistics
##                                          
##                Accuracy : 0.5942         
##                  95% CI : (0.587, 0.6012)
##     No Information Rate : 0.3345         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3913         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6623      0.5647      0.5559
## Specificity               0.7686      0.8000      0.8227
## Pos Pred Value            0.5875      0.5853      0.6117
## Neg Pred Value            0.8206      0.7862      0.7866
## Prevalence                0.3323      0.3333      0.3345
## Detection Rate            0.2200      0.1882      0.1859
## Detection Prevalence      0.3745      0.3215      0.3039
## Balanced Accuracy         0.7154      0.6823      0.6893
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.811     0.874     0.460     0.0650
## 2 klnk     0.775     0.798     0.328     0.0658
## 3 kord     0.777     0.798     0.344     0.0661
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35       12    29      0.414
##  2     0.4        89   228      0.390
##  3     0.45      236   585      0.403
##  4     0.5       442  1016      0.435
##  5     0.55      610  1352      0.451
##  6     0.6       588  1218      0.483
##  7     0.65      584  1127      0.518
##  8     0.7       620  1181      0.525
##  9     0.75      759  1378      0.551
## 10     0.8       763  1303      0.586
## 11     0.85      816  1312      0.622
## 12     0.9      1058  1657      0.639
## 13     0.95     1486  2140      0.694
## 14     1        2908  3939      0.738
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0432

The accuracy is somewhat greater with the training data, and the confidence of the predictions is visually somewhat more aligned with the accuracy, though the model is over-confident even on the training data.

Forcing the trees to be less complex is an option to avoid over-fitting and over-confidence. As an example, with fewer trees and larger nodes:

# Attempt a basic random forest on just the speed bucket, wind direction, and month
rfSmaller <- trainData %>%
    select(source, spdW, wd, monthfct) %>%
    randomForest::randomForest(source ~ spdW + wd + monthfct, data=., mtry=2, nodesize=5)
rfSmaller
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct, data = .,      mtry = 2, nodesize = 5) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 46.2%
## Confusion matrix:
##      klas klnk kord class.error
## klas 3898 1195 1074   0.3679261
## klnk 1702 2993 1461   0.5138077
## kord 1551 1557 3054   0.5043817
# Print accuracy and confidence
rfSmallerAccData <- evalWindPredictions(rfSmaller, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 4233 1490 1360
##       klnk 1026 3529 1264
##       kord  908 1137 3538
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6113          
##                  95% CI : (0.6042, 0.6183)
##     No Information Rate : 0.3336          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4169          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6864      0.5733      0.5742
## Specificity               0.7686      0.8143      0.8341
## Pos Pred Value            0.5976      0.6065      0.6337
## Neg Pred Value            0.8304      0.7926      0.7966
## Prevalence                0.3336      0.3330      0.3334
## Detection Rate            0.2290      0.1909      0.1914
## Detection Prevalence      0.3832      0.3148      0.3020
## Balanced Accuracy         0.7275      0.6938      0.7041
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.812     0.868     0.460     0.0624
## 2 klnk     0.774     0.804     0.321     0.0655
## 3 kord     0.777     0.802     0.344     0.0662
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35       14    40      0.35 
##  2     0.4        76   216      0.352
##  3     0.45      215   555      0.387
##  4     0.5       469  1073      0.437
##  5     0.55      667  1430      0.466
##  6     0.6       585  1190      0.492
##  7     0.65      561  1056      0.531
##  8     0.7       626  1110      0.564
##  9     0.75      743  1314      0.565
## 10     0.8       836  1369      0.611
## 11     0.85      912  1451      0.629
## 12     0.9       991  1463      0.677
## 13     0.95     1644  2300      0.715
## 14     1        2961  3918      0.756
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0360

rfSmallerAccData <- evalWindPredictions(rfSmaller, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1599  696  639
##       klnk  557 1284  691
##       kord  476  658 1322
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5308          
##                  95% CI : (0.5197, 0.5418)
##     No Information Rate : 0.3348          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2963          
##                                           
##  Mcnemar's Test P-Value : 1.037e-08       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6075      0.4867      0.4985
## Specificity               0.7476      0.7638      0.7848
## Pos Pred Value            0.5450      0.5071      0.5383
## Neg Pred Value            0.7929      0.7488      0.7567
## Prevalence                0.3322      0.3330      0.3348
## Detection Rate            0.2018      0.1621      0.1669
## Detection Prevalence      0.3704      0.3196      0.3100
## Balanced Accuracy         0.6776      0.6253      0.6417
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.800     0.858     0.443     0.0733
## 2 klnk     0.763     0.784     0.293     0.0713
## 3 kord     0.772     0.794     0.331     0.0747
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        4    24      0.167
##  2     0.4        45   109      0.413
##  3     0.45       85   261      0.326
##  4     0.5       163   442      0.369
##  5     0.55      283   649      0.436
##  6     0.6       246   582      0.423
##  7     0.65      234   509      0.460
##  8     0.7       223   458      0.487
##  9     0.75      279   572      0.488
## 10     0.8       289   572      0.505
## 11     0.85      372   660      0.564
## 12     0.9       320   568      0.563
## 13     0.95      594   973      0.610
## 14     1        1068  1543      0.692
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0687

The impact is modest. Perhaps restricting the maximum number of nodes can improve performance:

# Attempt a basic random forest on just the speed bucket, wind direction, and month
rfSmaller <- trainData %>%
    select(source, spdW, wd, monthfct) %>%
    randomForest::randomForest(source ~ spdW + wd + monthfct, data=., mtry=2, nodesize=25, ntree=100)
rfSmaller
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct, data = .,      mtry = 2, nodesize = 25, ntree = 100) 
##                Type of random forest: classification
##                      Number of trees: 100
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 45.33%
## Confusion matrix:
##      klas klnk kord class.error
## klas 3897 1213 1057   0.3680882
## klnk 1567 3196 1393   0.4808317
## kord 1478 1672 3012   0.5111977
# Print accuracy and confidence
rfSmallerAccData <- evalWindPredictions(rfSmaller, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 4092 1448 1335
##       klnk 1111 3512 1404
##       kord  964 1196 3423
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5965          
##                  95% CI : (0.5894, 0.6036)
##     No Information Rate : 0.3336          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3948          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6635      0.5705      0.5555
## Specificity               0.7741      0.7960      0.8247
## Pos Pred Value            0.5952      0.5827      0.6131
## Neg Pred Value            0.8213      0.7878      0.7877
## Prevalence                0.3336      0.3330      0.3334
## Detection Rate            0.2214      0.1900      0.1852
## Detection Prevalence      0.3719      0.3260      0.3020
## Balanced Accuracy         0.7188      0.6833      0.6901
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.819      0.88     0.458     0.0488
## 2 klnk     0.784      0.82     0.331     0.0481
## 3 kord     0.782      0.82     0.339     0.0518
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35       14    42      0.333
##  2     0.4        65   188      0.346
##  3     0.45      138   334      0.413
##  4     0.5       502  1193      0.421
##  5     0.55      528  1154      0.458
##  6     0.6       628  1324      0.474
##  7     0.65      511  1011      0.505
##  8     0.7       639  1253      0.510
##  9     0.75      719  1285      0.560
## 10     0.8       735  1295      0.568
## 11     0.85      893  1470      0.607
## 12     0.9      1089  1699      0.641
## 13     0.95     1589  2289      0.694
## 14     1        2977  3948      0.754
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0439

rfSmallerAccData <- evalWindPredictions(rfSmaller, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1600  683  619
##       klnk  556 1349  694
##       kord  476  606 1339
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5413          
##                  95% CI : (0.5302, 0.5523)
##     No Information Rate : 0.3348          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.312           
##                                           
##  Mcnemar's Test P-Value : 3.353e-08       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6079      0.5114      0.5049
## Specificity               0.7539      0.7634      0.7947
## Pos Pred Value            0.5513      0.5190      0.5531
## Neg Pred Value            0.7944      0.7578      0.7613
## Prevalence                0.3322      0.3330      0.3348
## Detection Rate            0.2020      0.1703      0.1690
## Detection Prevalence      0.3663      0.3281      0.3056
## Balanced Accuracy         0.6809      0.6374      0.6498
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.807      0.86     0.437     0.0642
## 2 klnk     0.773      0.8      0.304     0.0576
## 3 kord     0.782      0.81     0.331     0.0494
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        4    18      0.222
##  2     0.4        34    94      0.362
##  3     0.45       78   183      0.426
##  4     0.5       216   536      0.403
##  5     0.55      202   513      0.394
##  6     0.6       247   584      0.423
##  7     0.65      188   408      0.461
##  8     0.7       268   543      0.494
##  9     0.75      292   597      0.489
## 10     0.8       299   565      0.529
## 11     0.85      346   636      0.544
## 12     0.9       415   701      0.592
## 13     0.95      547   900      0.608
## 14     1        1152  1644      0.701
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0675

It is encouraging that the accuracy stayed flat or even slightly improved, while the confidence in the predictions is reduced.

Perhaps the caret::train() functionality can help drill-down on some better parameters. Since there are very few variables, there are only 3 possible combinations with mtry=2. So, even ntree=50 should provide plenty of opportunity to explore the variable and observations spaces. Modelling is switched to ranger::ranger() with slight change in parameter names.

trGrid <- expand.grid(min.node.size=c(1, 5, 10, 25, 100), mtry=c(1, 2), splitrule=c("gini"))

caretModel <- caret::train(source ~ spdW + wd + monthfct, 
                           data=trainData,
                           method="ranger",
                           tuneGrid=trGrid,
                           trControl=caret::trainControl(method="cv", number=5),
                           num.trees=50
                           )
caretModel
## Random Forest 
## 
## 18485 samples
##     3 predictor
##     3 classes: 'klas', 'klnk', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 14787, 14787, 14789, 14788, 14789 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##     1            1     0.5081958  0.2622854
##     1            2     0.5177179  0.2765768
##     5            1     0.5053828  0.2580624
##     5            2     0.5150677  0.2726065
##    10            1     0.4946166  0.2418788
##    10            2     0.5195025  0.2792524
##    25            1     0.5034353  0.2551507
##    25            2     0.5208547  0.2812840
##   100            1     0.5055452  0.2582975
##   100            2     0.5156613  0.2734966
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 2, splitrule = gini
##  and min.node.size = 25.
# Run the best parameters from ranger in randomForest
caretBest <- randomForest::randomForest(source ~ spdW + wd + monthfct, 
                                        data=trainData,
                                        ntree=50, 
                                        nodesize=10, 
                                        mtry=2
                                        )

caretAccData <- evalWindPredictions(caretBest, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 4187 1461 1340
##       klnk 1048 3518 1305
##       kord  932 1177 3517
## 
## Overall Statistics
##                                        
##                Accuracy : 0.6071       
##                  95% CI : (0.6, 0.6141)
##     No Information Rate : 0.3336       
##     P-Value [Acc > NIR] : < 2.2e-16    
##                                        
##                   Kappa : 0.4106       
##                                        
##  Mcnemar's Test P-Value : < 2.2e-16    
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6789      0.5715      0.5708
## Specificity               0.7726      0.8091      0.8289
## Pos Pred Value            0.5992      0.5992      0.6251
## Neg Pred Value            0.8278      0.7909      0.7943
## Prevalence                0.3336      0.3330      0.3334
## Detection Rate            0.2265      0.1903      0.1903
## Detection Prevalence      0.3780      0.3176      0.3044
## Balanced Accuracy         0.7258      0.6903      0.6998
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.815      0.88     0.451     0.0535
## 2 klnk     0.778      0.8      0.321     0.0575
## 3 kord     0.781      0.8      0.339     0.0550
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        8    26      0.308
##  2     0.4        76   199      0.382
##  3     0.45      205   532      0.385
##  4     0.5       574  1298      0.442
##  5     0.55      464  1005      0.462
##  6     0.6       677  1363      0.497
##  7     0.65      475   930      0.511
##  8     0.7       800  1440      0.556
##  9     0.75      648  1157      0.560
## 10     0.8       865  1450      0.597
## 11     0.85      668  1035      0.645
## 12     0.9      1316  2067      0.637
## 13     0.95     1235  1815      0.680
## 14     1        3211  4168      0.770
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0388

caretAccData <- evalWindPredictions(caretBest, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 1598  680  623
##       klnk  552 1314  693
##       kord  482  644 1336
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5362          
##                  95% CI : (0.5252, 0.5473)
##     No Information Rate : 0.3348          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3044          
##                                           
##  Mcnemar's Test P-Value : 3.088e-07       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.6071      0.4981      0.5038
## Specificity               0.7537      0.7644      0.7863
## Pos Pred Value            0.5508      0.5135      0.5426
## Neg Pred Value            0.7941      0.7531      0.7590
## Prevalence                0.3322      0.3330      0.3348
## Detection Rate            0.2017      0.1659      0.1686
## Detection Prevalence      0.3662      0.3230      0.3108
## Balanced Accuracy         0.6804      0.6312      0.6451
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.803      0.86     0.436     0.0619
## 2 klnk     0.767      0.78     0.297     0.0591
## 3 kord     0.774      0.8      0.319     0.0637
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        6    13      0.462
##  2     0.4        34   103      0.330
##  3     0.45       83   244      0.340
##  4     0.5       262   624      0.420
##  5     0.55      205   450      0.456
##  6     0.6       266   648      0.410
##  7     0.65      199   399      0.499
##  8     0.7       285   598      0.477
##  9     0.75      203   466      0.436
## 10     0.8       366   658      0.556
## 11     0.85      253   473      0.535
## 12     0.9       485   834      0.582
## 13     0.95      453   777      0.583
## 14     1        1148  1635      0.702
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0682

There is still no significant gain in accuracy, and the overconfidence issue persists. Ultimtately, wind speed and direction by month provides only modest predictive power for whether the data are taken from Las Vegas, Lincoln, or Chicago. The sensitivities suggest better ability to correctly classify Las Vegas (63%), which is reasonable given that winds in Lincoln and Chicago will both tend to be wintry for chunks of the year.

Example #43: Predicting Locale Based on Additional METAR Data

Winds provide some differentiation between Las Vegas, Chicago, and Lincoln. Two additional areas to explore include:

  1. Can the model perform better if it only needs to differentiate two locales with the most differences in wind - Las Vegas and Chicago?
  2. Does adding temperature and dewpoint help further differentiate the locales?

Example code includes:

# Differentiating Las Vegas and Chicago
windData <- bind_rows(klas=klas2016Wind, kord=kord2016Wind, .id="source") %>%
    mutate(source=factor(source))
windData
## # A tibble: 17,588 x 9
##    source valid               dirW   spdW spdBucket wd    predomDir month
##    <fct>  <dttm>              <chr> <int> <fct>     <fct> <fct>     <dbl>
##  1 klas   2015-12-31 00:56:00 100       3 Light     E     East         12
##  2 klas   2015-12-31 01:56:00 010       5 Light     N     North        12
##  3 klas   2015-12-31 02:56:00 010       5 Light     N     North        12
##  4 klas   2015-12-31 03:56:00 340       5 Light     NNW   North        12
##  5 klas   2015-12-31 04:56:00 210       3 Light     SSW   South        12
##  6 klas   2015-12-31 05:56:00 230       3 Light     WSW   West         12
##  7 klas   2015-12-31 06:56:00 230       3 Light     WSW   West         12
##  8 klas   2015-12-31 07:56:00 000       0 None      None  None         12
##  9 klas   2015-12-31 08:56:00 000       0 None      None  None         12
## 10 klas   2015-12-31 09:56:00 210       3 Light     SSW   South        12
## # ... with 17,578 more rows, and 1 more variable: monthfct <fct>
set.seed(2005071256)
trainIdx <- sample(1:nrow(windData), round(0.7*nrow(windData), 0), replace=FALSE) %>% sort()

trainData <- windData[trainIdx, ]
testData <- windData[-trainIdx, ]


# Create a tuning grid and run the models
trGrid <- expand.grid(min.node.size=c(1, 5, 10, 25, 100), mtry=c(1, 2), splitrule=c("gini"))

caretModel <- caret::train(source ~ spdW + wd + monthfct, 
                           data=trainData,
                           method="ranger",
                           tuneGrid=trGrid,
                           trControl=caret::trainControl(method="cv", number=5),
                           num.trees=50
                           )
caretModel
## Random Forest 
## 
## 12312 samples
##     3 predictor
##     2 classes: 'klas', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 9850, 9850, 9850, 9849, 9849 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##     1            1     0.6478245  0.2951770
##     1            2     0.6792543  0.3585034
##     5            1     0.6524536  0.3046994
##     5            2     0.6725946  0.3450103
##    10            1     0.6510700  0.3016146
##    10            2     0.6660168  0.3319220
##    25            1     0.6536693  0.3066919
##    25            2     0.6707254  0.3413855
##   100            1     0.6445748  0.2886369
##   100            2     0.6750307  0.3500195
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 2, splitrule = gini
##  and min.node.size = 1.
# Run the best parameters from ranger in randomForest
caretBest <- randomForest::randomForest(source ~ spdW + wd + monthfct, 
                                        data=trainData,
                                        ntree=50, 
                                        nodesize=25, 
                                        mtry=2
                                        )
caretBest
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct, data = trainData,      ntree = 50, nodesize = 25, mtry = 2) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 29.07%
## Confusion matrix:
##      klas kord class.error
## klas 4376 1755   0.2862502
## kord 1824 4357   0.2950979
caretAccData <- evalWindPredictions(caretBest, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas kord
##       klas 4632 1547
##       kord 1499 4634
##                                           
##                Accuracy : 0.7526          
##                  95% CI : (0.7449, 0.7602)
##     No Information Rate : 0.502           
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5052          
##                                           
##  Mcnemar's Test P-Value : 0.3944          
##                                           
##             Sensitivity : 0.7555          
##             Specificity : 0.7497          
##          Pos Pred Value : 0.7496          
##          Neg Pred Value : 0.7556          
##              Prevalence : 0.4980          
##          Detection Rate : 0.3762          
##    Detection Prevalence : 0.5019          
##       Balanced Accuracy : 0.7526          
##                                           
##        'Positive' Class : klas            
##                                           
## # A tibble: 2 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.874      0.96     0.573          0
## 2 kord     0.872      0.94     0.556          0
## # A tibble: 11 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.5       189   367      0.515
##  2     0.55      263   470      0.560
##  3     0.6       346   625      0.554
##  4     0.65      192   319      0.602
##  5     0.7       395   668      0.591
##  6     0.75      317   495      0.640
##  7     0.8       646   939      0.688
##  8     0.85      487   707      0.689
##  9     0.9       891  1208      0.738
## 10     0.95      936  1162      0.806
## 11     1        4604  5352      0.860
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0168

caretAccData <- evalWindPredictions(caretBest, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas kord
##       klas 1882  778
##       kord  770 1846
##                                           
##                Accuracy : 0.7066          
##                  95% CI : (0.6941, 0.7189)
##     No Information Rate : 0.5027          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.4132          
##                                           
##  Mcnemar's Test P-Value : 0.8588          
##                                           
##             Sensitivity : 0.7097          
##             Specificity : 0.7035          
##          Pos Pred Value : 0.7075          
##          Neg Pred Value : 0.7057          
##              Prevalence : 0.5027          
##          Detection Rate : 0.3567          
##    Detection Prevalence : 0.5042          
##       Balanced Accuracy : 0.7066          
##                                           
##        'Positive' Class : klas            
##                                           
## # A tibble: 2 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.875      0.94     0.566          0
## 2 kord     0.865      0.92     0.537          0
## # A tibble: 11 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.5        73   139      0.525
##  2     0.55      105   195      0.538
##  3     0.6       159   291      0.546
##  4     0.65       78   144      0.542
##  5     0.7       188   309      0.608
##  6     0.75      121   229      0.528
##  7     0.8       254   391      0.650
##  8     0.85      192   304      0.632
##  9     0.9       392   566      0.693
## 10     0.95      391   541      0.723
## 11     1        1775  2167      0.819
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0309

Accuracy increases to 71%, compared with a baseline accuracy of 51% based on majority class. So, the model is taking about 40% of the baseline errors and making accuract predictions for them.

The previous accuracy in predicting among 3 cities was 54%, compared with a baselne accurcay of 34% for the majority class. So, the old models were taking about 30% of the baseline erros and making accuract predictions for them.

Is it in fact harder to differentiate Lincoln and Chicago?

# Differentiating Lincoln and Chicago
windData <- bind_rows(klnk=klnk2016Wind, kord=kord2016Wind, .id="source") %>%
    mutate(source=factor(source))
windData
## # A tibble: 17,595 x 9
##    source valid               dirW   spdW spdBucket wd    predomDir month
##    <fct>  <dttm>              <chr> <int> <fct>     <fct> <fct>     <dbl>
##  1 klnk   2015-12-31 00:54:00 300       5 Light     WNW   West         12
##  2 klnk   2015-12-31 01:54:00 000       0 None      None  None         12
##  3 klnk   2015-12-31 02:54:00 000       0 None      None  None         12
##  4 klnk   2015-12-31 03:54:00 280       3 Light     W     West         12
##  5 klnk   2015-12-31 04:54:00 310       5 Light     WNW   West         12
##  6 klnk   2015-12-31 05:54:00 010       9 Moderate  N     North        12
##  7 klnk   2015-12-31 06:54:00 000       0 None      None  None         12
##  8 klnk   2015-12-31 07:54:00 010       3 Light     N     North        12
##  9 klnk   2015-12-31 08:54:00 000       0 None      None  None         12
## 10 klnk   2015-12-31 09:54:00 000       0 None      None  None         12
## # ... with 17,585 more rows, and 1 more variable: monthfct <fct>
set.seed(2005071306)
trainIdx <- sample(1:nrow(windData), round(0.7*nrow(windData), 0), replace=FALSE) %>% sort()

trainData <- windData[trainIdx, ]
testData <- windData[-trainIdx, ]


# Create a tuning grid and run the models
trGrid <- expand.grid(min.node.size=c(1, 5, 10, 25, 100), mtry=c(1, 2), splitrule=c("gini"))

caretModel <- caret::train(source ~ spdW + wd + monthfct, 
                           data=trainData,
                           method="ranger",
                           tuneGrid=trGrid,
                           trControl=caret::trainControl(method="cv", number=5),
                           num.trees=50
                           )
caretModel
## Random Forest 
## 
## 12316 samples
##     3 predictor
##     2 classes: 'klnk', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 9853, 9853, 9852, 9853, 9853 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##     1            1     0.6336489  0.2672128
##     1            2     0.6462333  0.2923285
##     5            1     0.6416874  0.2832613
##     5            2     0.6452592  0.2903956
##    10            1     0.6409559  0.2815845
##    10            2     0.6458275  0.2915312
##    25            1     0.6411177  0.2821489
##    25            2     0.6464771  0.2927927
##   100            1     0.6363274  0.2726410
##   100            2     0.6450156  0.2898885
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 2, splitrule = gini
##  and min.node.size = 25.
# Run the best parameters from ranger in randomForest
caretBest <- randomForest::randomForest(source ~ spdW + wd + monthfct, 
                                        data=trainData,
                                        ntree=50, 
                                        nodesize=100, 
                                        mtry=2
                                        )
caretBest
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct, data = trainData,      ntree = 50, nodesize = 100, mtry = 2) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 33.79%
## Confusion matrix:
##      klnk kord class.error
## klnk 4365 1812   0.2933463
## kord 2350 3789   0.3827985
caretAccData <- evalWindPredictions(caretBest, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klnk kord
##       klnk 4473 2221
##       kord 1704 3918
##                                          
##                Accuracy : 0.6813         
##                  95% CI : (0.673, 0.6895)
##     No Information Rate : 0.5015         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.3624         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.7241         
##             Specificity : 0.6382         
##          Pos Pred Value : 0.6682         
##          Neg Pred Value : 0.6969         
##              Prevalence : 0.5015         
##          Detection Rate : 0.3632         
##    Detection Prevalence : 0.5435         
##       Balanced Accuracy : 0.6812         
##                                          
##        'Positive' Class : klnk           
##                                          
## # A tibble: 2 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klnk     0.857      0.92     0.507          0
## 2 kord     0.858      0.92     0.512          0
## # A tibble: 11 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.5       164   338      0.485
##  2     0.55      206   374      0.551
##  3     0.6       331   672      0.493
##  4     0.65      316   568      0.556
##  5     0.7       456   786      0.580
##  6     0.75      333   561      0.594
##  7     0.8       650  1089      0.597
##  8     0.85      497   754      0.659
##  9     0.9       905  1396      0.648
## 10     0.95     1042  1456      0.716
## 11     1        3491  4322      0.808
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0356

caretAccData <- evalWindPredictions(caretBest, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klnk kord
##       klnk 1849  978
##       kord  764 1688
##                                           
##                Accuracy : 0.67            
##                  95% CI : (0.6571, 0.6827)
##     No Information Rate : 0.505           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3405          
##                                           
##  Mcnemar's Test P-Value : 3.337e-07       
##                                           
##             Sensitivity : 0.7076          
##             Specificity : 0.6332          
##          Pos Pred Value : 0.6541          
##          Neg Pred Value : 0.6884          
##              Prevalence : 0.4950          
##          Detection Rate : 0.3503          
##    Detection Prevalence : 0.5355          
##       Balanced Accuracy : 0.6704          
##                                           
##        'Positive' Class : klnk            
##                                           
## # A tibble: 2 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klnk     0.859      0.92     0.518          0
## 2 kord     0.860      0.92     0.512          0
## # A tibble: 11 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.5        67   158      0.424
##  2     0.55       84   159      0.528
##  3     0.6       141   258      0.547
##  4     0.65      133   236      0.564
##  5     0.7       211   346      0.610
##  6     0.75      131   234      0.560
##  7     0.8       245   420      0.583
##  8     0.85      226   354      0.638
##  9     0.9       387   619      0.625
## 10     0.95      465   629      0.739
## 11     1        1447  1866      0.775
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0415

Accuracy dips to 66%, so it is marginally harder to differentiate Lincoln and Chicago (both wintry cities near the east-west center of the US) than it is to differentiate from Las Vegas (a western desert city that experiences a different form of winter).

Can adding temperature and dewpoint help with the differentiation? This factor will also tend to be much different in Las Vegas than in Lincoln or Chicago.

klnk2016WindTempDew <- plotWindData(klnk2016METAR, 
                                    subT="Lincoln, NE (2016)", 
                                    extraVars=c("TempF", "DewF"), 
                                    showPlots=FALSE
                                    )
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         351      324   213    NA
##  2 N           272      378   415    NA
##  3 NNE          62      240   178    NA
##  4 ENE          47      158   118    NA
##  5 E            16      117   122    NA
##  6 ESE          64      300   208    NA
##  7 SSE         245      455   342    NA
##  8 S           454      631   409    NA
##  9 SSW         193      262   150    NA
## 10 WSW          30      105   114    NA
## 11 W            43      133   139    NA
## 12 WNW         150      207   156    NA
## 13 Variable     NA       NA   114    NA
## 14 None         NA       NA    NA   875
klas2016WindTempDew <- plotWindData(klas2016METAR, 
                                    subT="Las Vegas, NV (2016)", 
                                    extraVars=c("TempF", "DewF"), 
                                    showPlots=FALSE
                                    )
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         159      102    85    NA
##  2 N            56      140   172    NA
##  3 NNE          55      202   235    NA
##  4 ENE          35      189   301    NA
##  5 E             7      128   254    NA
##  6 ESE           3       35   140    NA
##  7 SSE          14      161   181    NA
##  8 S           140      617   605    NA
##  9 SSW         215      405  1029    NA
## 10 WSW         110      196   377    NA
## 11 W            25       52   270    NA
## 12 WNW          17       47   127    NA
## 13 Variable     NA       NA   503    NA
## 14 None         NA       NA    NA  1394
kord2016WindTempDew <- plotWindData(kord2016METAR, 
                                    subT="Chicago, IL (2016)", 
                                    extraVars=c("TempF", "DewF"), 
                                    showPlots=FALSE
                                    )
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         166      274   176    NA
##  2 N            80      226   245    NA
##  3 NNE         127      352   179    NA
##  4 ENE          56      333   172    NA
##  5 E            27      274   170    NA
##  6 ESE          37      145    84    NA
##  7 SSE          42      263   201    NA
##  8 S           179      427   302    NA
##  9 SSW         217      406   298    NA
## 10 WSW         179      429   333    NA
## 11 W           286      490   265    NA
## 12 WNW         193      361   164    NA
## 13 Variable     NA       NA   137    NA
## 14 None         NA       NA    NA   510
# Full data for Lincoln, Chicago, Las Vegas
windTempDewData <- bind_rows(klnk=klnk2016WindTempDew,
                             kord=kord2016WindTempDew, 
                             klas=klas2016WindTempDew, 
                             .id="source"
                             ) %>%
    mutate(source=factor(source))
windTempDewData
## # A tibble: 26,378 x 11
##    source valid               dirW   spdW TempF  DewF spdBucket wd    predomDir
##    <fct>  <dttm>              <chr> <int> <dbl> <dbl> <fct>     <fct> <fct>    
##  1 klnk   2015-12-31 00:54:00 300       5  27.0  19.9 Light     WNW   West     
##  2 klnk   2015-12-31 01:54:00 000       0  26.1  19.9 None      None  None     
##  3 klnk   2015-12-31 02:54:00 000       0  27.0  19.9 None      None  None     
##  4 klnk   2015-12-31 03:54:00 280       3  27.0  21.0 Light     W     West     
##  5 klnk   2015-12-31 04:54:00 310       5  27.0  19.9 Light     WNW   West     
##  6 klnk   2015-12-31 05:54:00 010       9  21.0  14   Moderate  N     North    
##  7 klnk   2015-12-31 06:54:00 000       0  19.0  12.0 None      None  None     
##  8 klnk   2015-12-31 07:54:00 010       3  18.0  12.0 Light     N     North    
##  9 klnk   2015-12-31 08:54:00 000       0  14    10.0 None      None  None     
## 10 klnk   2015-12-31 09:54:00 000       0  16.0  10.9 None      None  None     
## # ... with 26,368 more rows, and 2 more variables: month <dbl>, monthfct <fct>
set.seed(2005071329)
trainIdx <- sample(1:nrow(windTempDewData), round(0.7*nrow(windTempDewData), 0), replace=FALSE) %>% sort()

trainData <- windTempDewData[trainIdx, ]
testData <- windTempDewData[-trainIdx, ]


# Create a tuning grid and run the models
trGrid <- expand.grid(min.node.size=c(1, 5, 10, 25), mtry=c(1, 2, 3, 4, 5), splitrule=c("gini"))

caretModel <- caret::train(source ~ spdW + wd + monthfct + TempF + DewF, 
                           data=trainData,
                           method="ranger",
                           tuneGrid=trGrid,
                           trControl=caret::trainControl(method="cv", number=5),
                           num.trees=50
                           )
caretModel
## Random Forest 
## 
## 18465 samples
##     5 predictor
##     3 classes: 'klas', 'klnk', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 14772, 14772, 14772, 14772, 14772 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##    1             1     0.5634444  0.3453425
##    1             2     0.6418088  0.4627716
##    1             3     0.7092878  0.5639086
##    1             4     0.7474140  0.6210909
##    1             5     0.7703222  0.6554637
##    5             1     0.5916057  0.3875219
##    5             2     0.6487950  0.4732196
##    5             3     0.7065800  0.5598324
##    5             4     0.7492012  0.6237761
##    5             5     0.7710263  0.6565183
##   10             1     0.5702139  0.3554148
##   10             2     0.6522610  0.4783923
##   10             3     0.7064717  0.5596798
##   10             4     0.7455727  0.6183328
##   10             5     0.7676144  0.6513979
##   25             1     0.5883022  0.3825844
##   25             2     0.6577850  0.4867405
##   25             3     0.7128080  0.5691824
##   25             4     0.7406445  0.6109352
##   25             5     0.7568373  0.6352346
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 5.
# Run the best parameters from ranger in randomForest
caretBest <- randomForest::randomForest(source ~ spdW + wd + monthfct + TempF + DewF, 
                                        data=trainData,
                                        ntree=50, 
                                        nodesize=1, 
                                        mtry=5
                                        )
caretBest
## 
## Call:
##  randomForest(formula = source ~ spdW + wd + monthfct + TempF +      DewF, data = trainData, ntree = 50, nodesize = 1, mtry = 5) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 18.29%
## Confusion matrix:
##      klas klnk kord class.error
## klas 5717  256  147  0.06584967
## klnk  269 4629 1285  0.25133430
## kord  131 1290 4741  0.23060695
caretAccData <- evalWindPredictions(caretBest, testData=trainData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 6113    1    2
##       klnk    7 6170   58
##       kord    0   12 6102
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9957          
##                  95% CI : (0.9946, 0.9966)
##     No Information Rate : 0.3348          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9935          
##                                           
##  Mcnemar's Test P-Value : 5.252e-08       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.9989      0.9979      0.9903
## Specificity               0.9998      0.9947      0.9990
## Pos Pred Value            0.9995      0.9896      0.9980
## Neg Pred Value            0.9994      0.9989      0.9951
## Prevalence                0.3314      0.3348      0.3337
## Detection Rate            0.3311      0.3341      0.3305
## Detection Prevalence      0.3312      0.3377      0.3311
## Balanced Accuracy         0.9993      0.9963      0.9946
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.960      1        0.840   0.000327
## 2 klnk     0.874      0.9      0.458   0       
## 3 kord     0.885      0.92     0.503   0       
## # A tibble: 12 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.45        1     1      1    
##  2     0.5        14    20      0.7  
##  3     0.55       58    74      0.784
##  4     0.6       225   253      0.889
##  5     0.65      337   346      0.974
##  6     0.7       832   845      0.985
##  7     0.75      843   845      0.998
##  8     0.8      1761  1764      0.998
##  9     0.85     1469  1471      0.999
## 10     0.9      2820  2821      1.00 
## 11     0.95     2547  2547      1    
## 12     1        7478  7478      1    
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0173

caretAccData <- evalWindPredictions(caretBest, testData=testData)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 2508  121   51
##       klnk   88 2007  520
##       kord   67  479 2072
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8324         
##                  95% CI : (0.824, 0.8406)
##     No Information Rate : 0.3365         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.7486         
##                                          
##  Mcnemar's Test P-Value : 0.02847        
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.9418      0.7699      0.7840
## Specificity               0.9672      0.8854      0.8964
## Pos Pred Value            0.9358      0.7675      0.7914
## Neg Pred Value            0.9704      0.8867      0.8922
## Prevalence                0.3365      0.3295      0.3340
## Detection Rate            0.3169      0.2536      0.2618
## Detection Prevalence      0.3387      0.3305      0.3308
## Balanced Accuracy         0.9545      0.8276      0.8402
## # A tibble: 3 x 5
##   source meanMax medianMax pct90Plus pct50Minus
##   <fct>    <dbl>     <dbl>     <dbl>      <dbl>
## 1 klas     0.919      1        0.760     0.0308
## 2 klnk     0.756      0.76     0.225     0.0353
## 3 kord     0.762      0.78     0.247     0.0348
## # A tibble: 14 x 4
##    predProb nCorrect  nObs pctCorrect
##       <dbl>    <int> <int>      <dbl>
##  1     0.35        5    15      0.333
##  2     0.4        30    71      0.423
##  3     0.45       54    94      0.574
##  4     0.5       240   430      0.558
##  5     0.55      231   392      0.589
##  6     0.6       390   614      0.635
##  7     0.65      280   405      0.691
##  8     0.7       456   595      0.766
##  9     0.75      325   411      0.791
## 10     0.8       529   645      0.820
## 11     0.85      408   469      0.870
## 12     0.9       719   789      0.911
## 13     0.95      587   627      0.936
## 14     1        2333  2356      0.990
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1    0.00116

The model performs well even with parameters that would tend to lead to high variance - using all the variables in mtry and allowing a minimum node size of 1. Despite this, confidence in classifying the test data is roughly in line with actual predictive power.

The overall prediction accuracy in the training data is 100% due to parameters that drive high variance and lack of any CV. The ranger::ranger() call was used to establish the parameters, so this is not, in and of itself, overly concerning.

The overall prediction accuracy in the test data is 82%, consistent with the randomForest::randomForest() estimate of 18% OOB error. The model is spectacular with Las Vegas, achieving 93% sensitivity and 97% specificity. The model also does well differentiating Lincoln and Chicago, achieving sensitivities in the 75%-80% range and specificities just under 90%.

caretBest %>%
    varImp() %>%
    rownames_to_column() %>%
    mutate(var=factor(rowname)) %>%
    ggplot(aes(x=fct_reorder(rowname, -Overall), y=Overall)) + 
    geom_col() + 
    labs(title="Temperature and Dew Point Best Separate Las Vegas, Lincoln, and Chicago", 
         subtitle="Variable Importance in Random Forest", 
         x="", 
         y="VarImp"
         )

So, temperature and dewpoint by month are significant helps in differentiating among the three cities.

Example #44: Function for Locale Predictions

The locale predictions can be converted to functional form. Functions include:

  • integrateLocaleData - bind rows for existing locale data
  • createTrainTestData - split a full dataset in to training and testing data
  • predictLocale - test user-specified options for mtry and min.node.size, pick the best, and report accuracy when applied to the training and testing data

Example code includes:

# Function to bind rows of any number of processed METAR files
integrateLocaleData <- function(...) {

    localeData <- bind_rows(..., .id="source") %>%
        mutate(source=factor(source))
    
    localeData
    
}


# Function to split an integrated file in to test and train
createTrainTestData <- function(df, trainSize=0.7, seed=NULL) {
    
    if (!is.null(seed)) { 
        set.seed(seed)
    }
    
    trainIdx <- sample(1:nrow(df), round(trainSize*nrow(df), 0), replace=FALSE) %>% 
        sort()
    
    trainData <- df[trainIdx, ]
    testData <- df[-trainIdx, ]
    
    list(trainData=trainData, testData=testData, trainSize=trainSize, seed=seed)
}

predictLocale <- function(predFormula,
                          listTestTrain, 
                          mns=c(1, 5, 10, 25, 100), 
                          mtry=NULL,
                          ntree=50, 
                          seed=NULL
                          ) {

    # Test all the possible variables in mtry if passed as NULL
    if (is.null(mtry)) {
        mtry <- 1:length(labels(terms(predFormula)))
    }
    
    # Create a tuning grid and run the models
    trGrid <- expand.grid(min.node.size=mns, mtry=mtry, splitrule=c("gini"))

    # If the seed has been provided, apply it before running the first model
    if (!is.null(seed)) {
        set.seed(seed)
    }
    
    # Run the training process using the training data
    caretModel <- caret::train(predFormula, 
                               data=listTestTrain$trainData,
                               method="ranger",
                               tuneGrid=trGrid,
                               trControl=caret::trainControl(method="cv", number=5),
                               num.trees=ntree
                               )
    print(caretModel)
    
    # Extract the best parameters
    cat("\nThe best parameters will be used:\n")
    print(caretModel$bestTune)
    
    # Run the best parameters from ranger in randomForest
    caretBest <- randomForest::randomForest(predFormula, 
                                            data=listTestTrain$trainData,
                                            ntree=ntree, 
                                            nodesize=caretModel$bestTune$min.node.size, 
                                            mtry=caretModel$bestTune$mtry
                                            )
    print(caretBest)

    # Evaluate prediction power on training data and testing data
    trainAccData <- evalWindPredictions(caretBest, testData=listTestTrain$trainData, 
                                        printConfSummary=FALSE, printConfTable=FALSE
                                        )
    testAccData <- evalWindPredictions(caretBest, testData=listTestTrain$testData, 
                                       printConfSummary=FALSE, printConfTable=FALSE
                                       )
    
    trainAcc <- round(mean(trainAccData$accurate), 4)
    testAcc <- round(mean(testAccData$accurate), 4)
    
    # Print the overall accuracy of the predictions
    cat("\nTraining Accuracy:", trainAcc)
    cat("\nTesting Accuracy:", testAcc, "\n")
    
    list(formula=predFormula, 
         seed=seed,
         tuningModel=caretModel, 
         finalModel=caretBest, 
         trainAccData=trainAccData, 
         testAccData=testAccData,
         trainAcc=trainAcc,
         testAcc=testAcc
         )
    
}

The relevant dataset components can be created (no need to cache):

# Create the component datasets
klas2016 <- plotWindData(klas2016METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         159      102    85    NA
##  2 N            56      140   172    NA
##  3 NNE          55      202   235    NA
##  4 ENE          35      189   301    NA
##  5 E             7      128   254    NA
##  6 ESE           3       35   140    NA
##  7 SSE          14      161   181    NA
##  8 S           140      617   605    NA
##  9 SSW         215      405  1029    NA
## 10 WSW         110      196   377    NA
## 11 W            25       52   270    NA
## 12 WNW          17       47   127    NA
## 13 Variable     NA       NA   503    NA
## 14 None         NA       NA    NA  1394
klnk2016 <- plotWindData(klnk2016METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         351      324   213    NA
##  2 N           272      378   415    NA
##  3 NNE          62      240   178    NA
##  4 ENE          47      158   118    NA
##  5 E            16      117   122    NA
##  6 ESE          64      300   208    NA
##  7 SSE         245      455   342    NA
##  8 S           454      631   409    NA
##  9 SSW         193      262   150    NA
## 10 WSW          30      105   114    NA
## 11 W            43      133   139    NA
## 12 WNW         150      207   156    NA
## 13 Variable     NA       NA   114    NA
## 14 None         NA       NA    NA   875
kord2016 <- plotWindData(kord2016METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         166      274   176    NA
##  2 N            80      226   245    NA
##  3 NNE         127      352   179    NA
##  4 ENE          56      333   172    NA
##  5 E            27      274   170    NA
##  6 ESE          37      145    84    NA
##  7 SSE          42      263   201    NA
##  8 S           179      427   302    NA
##  9 SSW         217      406   298    NA
## 10 WSW         179      429   333    NA
## 11 W           286      490   265    NA
## 12 WNW         193      361   164    NA
## 13 Variable     NA       NA   137    NA
## 14 None         NA       NA    NA   510
kdtw2016 <- plotWindData(kdtw2016METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW          54      186   219    NA
##  2 N            43      226   243    NA
##  3 NNE          19      231   314    NA
##  4 ENE           8      159   198    NA
##  5 E             4      122   239    NA
##  6 ESE           2       98   220    NA
##  7 SSE          22      258   335    NA
##  8 S           111      339   453    NA
##  9 SSW         324      571   284    NA
## 10 WSW         272      500   212    NA
## 11 W           187      371   204    NA
## 12 WNW         146      330   241    NA
## 13 Variable     NA       NA   167    NA
## 14 None         NA       NA    NA   857
kmsp2016 <- plotWindData(kmsp2016METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         251      389   169    NA
##  2 N            77      315   197    NA
##  3 NNE          32      214   146    NA
##  4 ENE          46      139   106    NA
##  5 E            47      193   135    NA
##  6 ESE         137      374   332    NA
##  7 SSE         144      621   403    NA
##  8 S           155      361   213    NA
##  9 SSW          52      298   289    NA
## 10 WSW          61      288   290    NA
## 11 W           167      288   200    NA
## 12 WNW         262      447   219    NA
## 13 Variable     NA       NA   215    NA
## 14 None         NA       NA    NA   538
kord2015 <- plotWindData(kord2015METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         138      251   110    NA
##  2 N            64      203   205    NA
##  3 NNE         196      376   230    NA
##  4 ENE         116      413   176    NA
##  5 E            17      239   136    NA
##  6 ESE          23       77    87    NA
##  7 SSE          68      194   201    NA
##  8 S           189      398   311    NA
##  9 SSW         235      521   339    NA
## 10 WSW         187      517   330    NA
## 11 W           278      416   271    NA
## 12 WNW         174      299   163    NA
## 13 Variable     NA       NA   102    NA
## 14 None         NA       NA    NA   518
kord2017 <- plotWindData(kord2017METAR, subT="", extraVars=c("TempF", "DewF"), showPlots=FALSE)
## # A tibble: 14 x 5
##    wd       Strong Moderate Light  None
##    <fct>     <int>    <int> <int> <int>
##  1 NNW         128      373   159    NA
##  2 N            75      223   247    NA
##  3 NNE         211      358   210    NA
##  4 ENE          74      371   211    NA
##  5 E            20      250   155    NA
##  6 ESE          20      107    79    NA
##  7 SSE          45      249   172    NA
##  8 S           255      366   271    NA
##  9 SSW         235      354   231    NA
## 10 WSW         174      380   256    NA
## 11 W           267      503   271    NA
## 12 WNW         238      493   183    NA
## 13 Variable     NA       NA    99    NA
## 14 None         NA       NA    NA   464

Subsets of the components are then integrated to full datasets, with training and testing data created (no need to cache):

# Create the full 2016 dataset for Lincoln, Chicago, Las Vegas
fullDF001 <- integrateLocaleData(klnk=klnk2016,
                                 kord=kord2016, 
                                 klas=klas2016
                                 )

# Create the train data and the test data for the full 2016 dataset for Lincoln, Chicago, Las Vegas
listTestTrain001 <- createTrainTestData(fullDF001, trainSize=0.7, seed=2005071329)


# Create the full 2016 dataset for Chicago, Detroit, Minneapolis
fullDF002 <- integrateLocaleData(kord=kord2016,
                                 kdtw=kdtw2016, 
                                 kmsp=kmsp2016
                                 )

# Create the train data and the test data for the full 2016 dataset for Chicago, Detroit, Minneapolis
listTestTrain002 <- createTrainTestData(fullDF002, trainSize=0.7, seed=2005081434)


# Create the full dataset for Chicago 2015, 2016, 2017
fullDF003 <- integrateLocaleData(kord2015=kord2015,
                                 kord2016=kord2016, 
                                 kord2017=kord2017
                                 )

# Create the train data and the test data for the full 2016 dataset for Chicago, Detroit, Minneapolis
listTestTrain003 <- createTrainTestData(fullDF003, trainSize=0.7, seed=2005081436)

The modeling is re-run using the function for Chicago, Las Vegas, Lincoln, with accuracies on the training and testing data reported (cached):

# Run the locale predictions
pL001 <- predictLocale(source ~ spdW + wd + monthfct + TempF + DewF, 
                       listTestTrain=listTestTrain001, 
                       seed=2005071329
                       )
## Random Forest 
## 
## 18465 samples
##     5 predictor
##     3 classes: 'klas', 'klnk', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 14772, 14773, 14772, 14771, 14772 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##     1            1     0.5748734  0.3623643
##     1            2     0.6492291  0.4738774
##     1            3     0.7125913  0.5688709
##     1            4     0.7462227  0.6193083
##     1            5     0.7712966  0.6569230
##     5            1     0.5848365  0.3774489
##     5            2     0.6445166  0.4668218
##     5            3     0.7140537  0.5710467
##     5            4     0.7456268  0.6184121
##     5            5     0.7715678  0.6573294
##    10            1     0.5754696  0.3633040
##    10            2     0.6505292  0.4758399
##    10            3     0.7121566  0.5682115
##    10            4     0.7457349  0.6185706
##    10            5     0.7654483  0.6481474
##    25            1     0.5772526  0.3660704
##    25            2     0.6491749  0.4737832
##    25            3     0.7089636  0.5634191
##    25            4     0.7422694  0.6133791
##    25            5     0.7606280  0.6409245
##   100            1     0.5806124  0.3710322
##   100            2     0.6509599  0.4765185
##   100            3     0.7005676  0.5508212
##   100            4     0.7238561  0.5857616
##   100            5     0.7387489  0.6081059
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 5.
## 
## The best parameters will be used:
##    mtry splitrule min.node.size
## 10    5      gini             5
## 
## Call:
##  randomForest(formula = predFormula, data = listTestTrain$trainData,      ntree = ntree, nodesize = caretModel$bestTune$min.node.size,      mtry = caretModel$bestTune$mtry) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 18.47%
## Confusion matrix:
##      klas klnk kord class.error
## klas 5697  285  138  0.06911765
## klnk  263 4773 1147  0.22804464
## kord  147 1431 4584  0.25608569
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 6028   38   33
##       klnk   56 6012  293
##       kord   36  133 5836
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9681          
##                  95% CI : (0.9655, 0.9706)
##     No Information Rate : 0.3348          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9522          
##                                           
##  Mcnemar's Test P-Value : 9.651e-14       
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.9850      0.9723      0.9471
## Specificity               0.9942      0.9716      0.9863
## Pos Pred Value            0.9884      0.9451      0.9719
## Neg Pred Value            0.9926      0.9859      0.9738
## Prevalence                0.3314      0.3348      0.3337
## Detection Rate            0.3265      0.3256      0.3161
## Detection Prevalence      0.3303      0.3445      0.3252
## Balanced Accuracy         0.9896      0.9720      0.9667
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0204

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction klas klnk kord
##       klas 2503  110   52
##       klnk   97 2051  597
##       kord   63  446 1994
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8275         
##                  95% CI : (0.819, 0.8358)
##     No Information Rate : 0.3365         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.7413         
##                                          
##  Mcnemar's Test P-Value : 2.845e-05      
## 
## Statistics by Class:
## 
##                      Class: klas Class: klnk Class: kord
## Sensitivity               0.9399      0.7867      0.7544
## Specificity               0.9691      0.8692      0.9034
## Pos Pred Value            0.9392      0.7472      0.7966
## Neg Pred Value            0.9695      0.8924      0.8800
## Prevalence                0.3365      0.3295      0.3340
## Detection Rate            0.3163      0.2592      0.2520
## Detection Prevalence      0.3368      0.3469      0.3163
## Balanced Accuracy         0.9545      0.8280      0.8289
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1    0.00153

## 
## Training Accuracy: 0.9681
## Testing Accuracy: 0.8275

As before, accuracy on the testing dataset is 82%, with very strong model performance in predicting Las Vegas and good but less accurate differentiation of Lincoln and Chicago.

Is the model largely pulling out that Las Vegas has a different winter? Running on Minneapolis, Chicago, and Detroit is interesting since they are all wintry cities with somewhat different dynamics. Minneapolis has the coldest and longest winters. Cached code includes:

# Run the locale predictions
pL002 <- predictLocale(source ~ spdW + wd + monthfct + TempF + DewF, 
                       listTestTrain=listTestTrain002, 
                       seed=2005081444
                       )
## Random Forest 
## 
## 18469 samples
##     5 predictor
##     3 classes: 'kdtw', 'kmsp', 'kord' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 14775, 14775, 14775, 14775, 14776 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa    
##     1            1     0.4259572  0.1384627
##     1            2     0.4551409  0.1828420
##     1            3     0.4708972  0.2065741
##     1            4     0.4995394  0.2494533
##     1            5     0.5237962  0.2858817
##     5            1     0.4341869  0.1513336
##     5            2     0.4549244  0.1825309
##     5            3     0.4732792  0.2101005
##     5            4     0.4976988  0.2467224
##     5            5     0.5260166  0.2891615
##    10            1     0.4288271  0.1429517
##    10            2     0.4531378  0.1798605
##    10            3     0.4694891  0.2043884
##    10            4     0.4996477  0.2496694
##    10            5     0.5234715  0.2853441
##    25            1     0.4308301  0.1458654
##    25            2     0.4518383  0.1777939
##    25            3     0.4731709  0.2099476
##    25            4     0.4959117  0.2440834
##    25            5     0.5195730  0.2795249
##   100            1     0.4271487  0.1403408
##   100            2     0.4536250  0.1805066
##   100            3     0.4679193  0.2021086
##   100            4     0.4853537  0.2282381
##   100            5     0.4977527  0.2468224
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 5.
## 
## The best parameters will be used:
##    mtry splitrule min.node.size
## 10    5      gini             5
## 
## Call:
##  randomForest(formula = predFormula, data = listTestTrain$trainData,      ntree = ntree, nodesize = caretModel$bestTune$min.node.size,      mtry = caretModel$bestTune$mtry) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 38.07%
## Confusion matrix:
##      kdtw kmsp kord class.error
## kdtw 3859  993 1272   0.3698563
## kmsp 1088 4029 1025   0.3440247
## kord 1458 1195 3550   0.4276963
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction kdtw kmsp kord
##       kdtw 5804  185  242
##       kmsp  138 5819  219
##       kord  182  138 5742
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9402          
##                  95% CI : (0.9367, 0.9436)
##     No Information Rate : 0.3359          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9103          
##                                           
##  Mcnemar's Test P-Value : 2.284e-07       
## 
## Statistics by Class:
## 
##                      Class: kdtw Class: kmsp Class: kord
## Sensitivity               0.9477      0.9474      0.9257
## Specificity               0.9654      0.9710      0.9739
## Pos Pred Value            0.9315      0.9422      0.9472
## Neg Pred Value            0.9739      0.9737      0.9628
## Prevalence                0.3316      0.3326      0.3359
## Detection Rate            0.3143      0.3151      0.3109
## Detection Prevalence      0.3374      0.3344      0.3282
## Balanced Accuracy         0.9566      0.9592      0.9498
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0493

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction kdtw kmsp kord
##       kdtw 1687  462  601
##       kmsp  435 1767  482
##       kord  523  439 1519
## 
## Overall Statistics
##                                          
##                Accuracy : 0.6283         
##                  95% CI : (0.6175, 0.639)
##     No Information Rate : 0.3371         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.4423         
##                                          
##  Mcnemar's Test P-Value : 0.04143        
## 
## Statistics by Class:
## 
##                      Class: kdtw Class: kmsp Class: kord
## Sensitivity               0.6378      0.6623      0.5838
## Specificity               0.7983      0.8252      0.8189
## Pos Pred Value            0.6135      0.6583      0.6123
## Neg Pred Value            0.8145      0.8278      0.8007
## Prevalence                0.3342      0.3371      0.3287
## Detection Rate            0.2131      0.2232      0.1919
## Detection Prevalence      0.3474      0.3391      0.3135
## Balanced Accuracy         0.7180      0.7438      0.7014
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1   0.000609

## 
## Training Accuracy: 0.9402
## Testing Accuracy: 0.6283

Accuracy on the testing dataset dips to 63%, with the model being roughly as good in predicting each of the cities (sensitivity for Minneapolis is 5% higher than the others, specificity is largely the same for all). This is consistent with the 38% OOB error estimate in the final random forest model. Prediction confidence on the training data is well aligned with actual prediction accuracy.

It is also interesting to consider whether the model has any accuracy in distinguishing among the years for the same city. Chicago has a variable climate over the medium-term, so the model is run on the full-year 2015, 2016, and 2017 data for Chicago. Cached code includes:

# Run the locale predictions
pL003 <- predictLocale(source ~ spdW + wd + monthfct + TempF + DewF, 
                       listTestTrain=listTestTrain003, 
                       seed=2005081447
                       )
## Random Forest 
## 
## 18445 samples
##     5 predictor
##     3 classes: 'kord2015', 'kord2016', 'kord2017' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 14756, 14756, 14757, 14756, 14755 
## Resampling results across tuning parameters:
## 
##   min.node.size  mtry  Accuracy   Kappa     
##     1            1     0.3910546  0.08578152
##     1            2     0.4464090  0.16931956
##     1            3     0.4994864  0.24904905
##     1            4     0.5459483  0.31886013
##     1            5     0.5854692  0.37816498
##     5            1     0.3926293  0.08827569
##     5            2     0.4441876  0.16601069
##     5            3     0.4973715  0.24593849
##     5            4     0.5471401  0.32063138
##     5            5     0.5851445  0.37768072
##    10            1     0.3847123  0.07633581
##    10            2     0.4429941  0.16417004
##    10            3     0.4976427  0.24631054
##    10            4     0.5405810  0.31081086
##    10            5     0.5815129  0.37224407
##    25            1     0.3933871  0.08946665
##    25            2     0.4479814  0.17167716
##    25            3     0.4986176  0.24777324
##    25            4     0.5442664  0.31633717
##    25            5     0.5760899  0.36409884
##   100            1     0.3866636  0.07931977
##   100            2     0.4397416  0.15934537
##   100            3     0.4800763  0.21993476
##   100            4     0.5178644  0.27672324
##   100            5     0.5438322  0.31570328
## 
## Tuning parameter 'splitrule' was held constant at a value of gini
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were mtry = 5, splitrule = gini
##  and min.node.size = 1.
## 
## The best parameters will be used:
##   mtry splitrule min.node.size
## 5    5      gini             1
## 
## Call:
##  randomForest(formula = predFormula, data = listTestTrain$trainData,      ntree = ntree, nodesize = caretModel$bestTune$min.node.size,      mtry = caretModel$bestTune$mtry) 
##                Type of random forest: classification
##                      Number of trees: 50
## No. of variables tried at each split: 5
## 
##         OOB estimate of  error rate: 32.98%
## Confusion matrix:
##          kord2015 kord2016 kord2017 class.error
## kord2015     4206     1011      931   0.3158751
## kord2016     1090     3985     1053   0.3497063
## kord2017      945     1053     4171   0.3238775
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction kord2015 kord2016 kord2017
##   kord2015     6106       48       35
##   kord2016       28     6051       40
##   kord2017       14       29     6094
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9895          
##                  95% CI : (0.9879, 0.9909)
##     No Information Rate : 0.3345          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9842          
##                                           
##  Mcnemar's Test P-Value : 0.001125        
## 
## Statistics by Class:
## 
##                      Class: kord2015 Class: kord2016 Class: kord2017
## Sensitivity                   0.9932          0.9874          0.9878
## Specificity                   0.9933          0.9945          0.9965
## Pos Pred Value                0.9866          0.9889          0.9930
## Neg Pred Value                0.9966          0.9938          0.9939
## Prevalence                    0.3333          0.3322          0.3345
## Detection Rate                0.3310          0.3281          0.3304
## Detection Prevalence          0.3355          0.3317          0.3327
## Balanced Accuracy             0.9932          0.9910          0.9922
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1     0.0316

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction kord2015 kord2016 kord2017
##   kord2015     1835      467      351
##   kord2016      446     1802      415
##   kord2017      339      408     1842
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6931          
##                  95% CI : (0.6828, 0.7033)
##     No Information Rate : 0.3386          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5396          
##                                           
##  Mcnemar's Test P-Value : 0.8611          
## 
## Statistics by Class:
## 
##                      Class: kord2015 Class: kord2016 Class: kord2017
## Sensitivity                   0.7004          0.6731          0.7063
## Specificity                   0.8452          0.8353          0.8590
## Pos Pred Value                0.6917          0.6767          0.7115
## Neg Pred Value                0.8505          0.8331          0.8559
## Prevalence                    0.3314          0.3386          0.3299
## Detection Rate                0.2321          0.2280          0.2330
## Detection Prevalence          0.3356          0.3369          0.3275
## Balanced Accuracy             0.7728          0.7542          0.7826
## 
## Mean Error-Squared Between Confidence of Prediction and Accuracy of Precition
## # A tibble: 1 x 1
##   meanError2
##        <dbl>
## 1   0.000496

## 
## Training Accuracy: 0.9895
## Testing Accuracy: 0.6931

Interestingly, the model is 69% accurate in classifying Chicago 2015-2016-2017, which is 5% higher than the model accuracy in classifying Chicago, Detroit, Minneapolis all in the same year (2016). Further extensions to look at cities with much less medium-term climate variation (perhaps coastal cities like San Diego or desert cities like Las Vegas) could be interesting as well.

Of note, the model does NOT have access to the day or the hour of observations, only to the month. So, the model is not currently using things like February 12, 2016 was very warm in city A for predictions. It only has access to Month, Wind Direction, Wind Speed, Temperature, and Dew Point.

Example #45: Relationship Between Prediction Accuracy and Variables

The variable importance suggest the temperature and dewpoint are the most important for differentiating among Chicago, Las Vegas, and Lincoln. Is this true for Chicago, Detroit, Minneapolis and also for Chicago 2015, 2016, 2017?

Example code includes:

pL001$finalModel %>%
    varImp() %>%
    rownames_to_column() %>%
    mutate(var=factor(rowname)) %>%
    ggplot(aes(x=fct_reorder(rowname, -Overall), y=Overall)) + 
    geom_col() + 
    labs(title="Temperature and Dew Point Best Separate Las Vegas, Lincoln, and Chicago", 
         subtitle="Variable Importance in Random Forest", 
         x="", 
         y="VarImp"
         )

pL002$finalModel %>%
    varImp() %>%
    rownames_to_column() %>%
    mutate(var=factor(rowname)) %>%
    ggplot(aes(x=fct_reorder(rowname, -Overall), y=Overall)) + 
    geom_col() + 
    labs(title="Temperature and Dew Point Best Separate Chicago, Detroit, and Minneapolis", 
         subtitle="Variable Importance in Random Forest", 
         x="", 
         y="VarImp"
         )

pL003$finalModel %>%
    varImp() %>%
    rownames_to_column() %>%
    mutate(var=factor(rowname)) %>%
    ggplot(aes(x=fct_reorder(rowname, -Overall), y=Overall)) + 
    geom_col() + 
    labs(title="All 5 Variables Help Differentiate Chicago 2015-2016-2017", 
         subtitle="Variable Importance in Random Forest", 
         x="", 
         y="VarImp"
         )

So, temperature and dewpoint appear to be key variables in each of the three models, with the Chicago 2015-2016-2017 modeling being somewhat more evenly influenced by all 5 factors.

The prediction accuracies for Chicago, Las Vegas, Lincoln can then be assessed across each of the underlying variables:

assessAccuracy1D <- function(accList, 
                             ttList, 
                             numVars, 
                             fctVars,
                             mapVarNames
                             ) {

    # Integrate the test set predictions and the raw data file
    fullTestData <- bind_cols(accList[["testAccData"]] %>% rename(accSource=source),
                              ttList[["testData"]]
                              )

    # Grab the test set prediction accuracy
    testAcc <- accList[["testAcc"]]

    # Confirm that sources are the same across the files
    fullTestData %>%
        count(accSource, source) %>%
        print()

    # Create point plots with smooths for accuracy by numeric variables
    for (numVar in numVars) {
        pl <- fullTestData %>%
            mutate(accurate=as.integer(accurate)) %>%
            group_by_at(vars(all_of(c(numVar, "accurate")))) %>%
            summarize(n=n()) %>%
            mutate(nAcc=n*accurate) %>%
            group_by_at(vars(all_of(numVar))) %>%
            summarize(n=sum(n), nAcc=sum(nAcc)) %>%
            mutate(pctAcc=nAcc/n) %>%
            ggplot(aes_string(y="pctAcc", x=numVar)) + 
            geom_point(aes(size=n)) +
            geom_smooth() +
            ylim(c(0, 1)) + 
            geom_hline(yintercept=testAcc, lty=2) +
            labs(y="% Accuracy", x=mapVarNames[numVar], 
                 subtitle=paste0("Overall Accuracy on Test Data: ", round(100*testAcc, 1), "%"),
                 title=paste0("Accuracy of Classifications on Test Data By ", mapVarNames[numVar])
                 )
        print(pl)
    }
    
    
    # Create bar plots for accuracy by factor variables
    for (fctVar in fctVars) {
        pl <- fullTestData %>%
            ggplot(aes_string(fill="accurate", x=fctVar)) + 
            geom_bar(position="fill") +
            geom_hline(yintercept=testAcc, lty=2) +
            labs(y="% Accuracy", x=mapVarNames[fctVar], 
                 subtitle=paste0("Overall Accuracy on Test Data: ", round(100*testAcc, 1), "%"),
                 title=paste0("Accuracy of Classifications on Test Data By ", mapVarNames[fctVar])
                 )
        print(pl)
    }
    
}


# Create a mapping for the variable names to better plotting names
mapVarNames <- c(DewF="Dew Point (F)", 
                 TempF="Temperature (F)", 
                 spdW="Wind Speed (kts)",
                 wd="Prevailing Wind Direction", 
                 monthfct="Month"
                 )

# Assess Accuracy by Dimension for Chicago, Las Vegas, Lincoln
assessAccuracy1D(pL001, 
                 listTestTrain001, 
                 numVars=c("spdW", "TempF", "DewF"), 
                 fctVars=c("wd", "monthfct"),
                 mapVarNames=mapVarNames
                 )
## # A tibble: 3 x 3
##   accSource source     n
##   <fct>     <fct>  <int>
## 1 klas      klas    2663
## 2 klnk      klnk    2607
## 3 kord      kord    2643
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 5 rows containing missing values (geom_smooth).

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Prediction accuracy is high for all values of the key modeled variables. There is some variation:

  • Model accuracy is slightly higher for wind speeds above 15 knots, though these are rare
  • Model accuracy is higher for extreme temperatures (under 20F and above 90F) than for modest temperatures
  • Model accuracy is higher for dew points under 20F and lower for dew points above 40F
  • Model accuracy is very similar by month and wind direction

It is interesting then to look at a 2D plot of temperatures and dew points:

listTestTrain001[["testData"]] %>%
    count(TempF, DewF, source) %>%
    ggplot(aes(x=TempF, y=DewF, size=n, color=source)) + 
    geom_point(alpha=0.3)

Las Vegas is very clearly differentiated from Chicago and Lincoln when looking at both dew point and temperature. In fact, how well would a 3-cluster k-means do in separating the data?

# Pull key data from the test-train file
keyData <- listTestTrain001[["testData"]] %>%
    select(TempF, DewF, source)

# Scale the numeric data for k-means
keyMatrix <- keyData %>%
    select(-source) %>%
    scale()

# Run k-means with 3 centers
kmModel <- kmeans(keyMatrix, centers=3)

# Augment the key data with the cluster results
keyData <- keyData %>%
    mutate(cluster=factor(kmModel$cluster))

# Summarize the clusters
keyData %>%
    group_by(cluster) %>%
    summarize(meanDewF=mean(DewF), meanTempF=mean(TempF), n=n(), 
              pctLAS=mean(source=="klas"), pctORD=mean(source=="kord"), pctLNK=mean(source=="klnk")
              )
## # A tibble: 3 x 7
##   cluster meanDewF meanTempF     n pctLAS pctORD pctLNK
##   <fct>      <dbl>     <dbl> <int>  <dbl>  <dbl>  <dbl>
## 1 1           28.0      79.1  1829  0.882 0.0377 0.0804
## 2 2           58.2      71.5  2945  0.113 0.438  0.449 
## 3 3           25.1      37.9  3139  0.228 0.409  0.363
# Plot the clusters
keyData %>%
    count(cluster, TempF, DewF) %>%
    ggplot(aes(x=TempF, y=DewF, size=n, color=cluster)) + 
    geom_point()

Much of the model’s success is from the observation that high temperature with low dew point occurs ~25% of the time and is almost always Las Vegas. Chicago and Lincoln tend to have temperatures and dew points that travel together, and a very simple k-means struggles to pull apart which is which.

Suppose that a different number of clusters were attempted - where is the elbow point?

kVals <- 1:20
bSS <- numeric(length(kVals))
wSS <- numeric(length(kVals))

for (x in seq_along(kVals)) {

    kmModel <- kmeans(keyMatrix, centers=kVals[x])
    bSS[x] <- kmModel$betweenss
    wSS[x] <- kmModel$tot.withinss
    
}

data.frame(centers=kVals, withinSS=wSS) %>%
    ggplot(aes(x=centers, y=withinSS)) + 
    geom_point() + 
    geom_line() + 
    labs(x="# Clusters", y="Total Within Sum-Squares") + 
    ylim(c(0, NA))

While there is no sharp elbow, there is some meaningful improvement out to 5 clusters. For curiosity, the model is run again for 5 clusters:

# Run k-means with 5 centers
kmModel <- kmeans(keyMatrix, centers=5)

# Augment the key data with the cluster results
keyData <- keyData %>%
    mutate(cluster=factor(kmModel$cluster))

# Summarize the clusters
keyData %>%
    group_by(cluster) %>%
    summarize(meanDewF=mean(DewF), meanTempF=mean(TempF), n=n(), 
              pctLAS=mean(source=="klas"), pctORD=mean(source=="kord"), pctLNK=mean(source=="klnk")
              )
## # A tibble: 5 x 7
##   cluster meanDewF meanTempF     n pctLAS  pctORD pctLNK
##   <fct>      <dbl>     <dbl> <int>  <dbl>   <dbl>  <dbl>
## 1 1           61.2      74.9  2320 0.0961 0.449   0.455 
## 2 2           30.3      90.0   969 0.951  0.00929 0.0392
## 3 3           21.5      63.3  1040 0.888  0.0346  0.0769
## 4 4           17.7      27.8  1521 0.104  0.456   0.440 
## 5 5           39.2      51.0  2063 0.211  0.418   0.371
# Plot the clusters
keyData %>%
    count(cluster, TempF, DewF) %>%
    ggplot(aes(x=TempF, y=DewF, size=n, color=cluster)) + 
    geom_point()

Expanding to 5 clusters does little to help break apart Lincoln and Chicago. There are just more buckets of temperature-dewpoint (high-high, low-low, higher and lower forms of medium-medium). And, Las Vegas has a cluster of very high temperature with low dew point (representing ~17% of the data) almost all to itself.

This suggests that while temperature and dew point are important, the random forest (which is about 75% accurate in separating Chicago and Lincoln) either takes advantage of the month and wind data also, or using sophisticated tree splits to tease out more granular relationships between temperature and dew point.